From 57760dc633a1700316e7c3fc3427d82e89e19eec Mon Sep 17 00:00:00 2001 From: vinodkiran Date: Tue, 10 Oct 2023 20:03:21 +0530 Subject: [PATCH] Updates to Elasticsearch VectoreStore Functionality. --- .../ElectricsearchUserPassword.credential.ts | 9 +- .../Elasticsearch/ElasticSearchBase.ts | 193 ++++++++++++++++++ .../Elasticsearch/Elasticsearch_Existing.ts | 112 ++-------- .../Elasticsearch/Elasticsearch_Upsert.ts | 168 +++------------ 4 files changed, 245 insertions(+), 237 deletions(-) create mode 100644 packages/components/nodes/vectorstores/Elasticsearch/ElasticSearchBase.ts diff --git a/packages/components/credentials/ElectricsearchUserPassword.credential.ts b/packages/components/credentials/ElectricsearchUserPassword.credential.ts index 2dd88937..6c47f7b1 100644 --- a/packages/components/credentials/ElectricsearchUserPassword.credential.ts +++ b/packages/components/credentials/ElectricsearchUserPassword.credential.ts @@ -14,14 +14,19 @@ class ElasticSearchUserPassword implements INodeCredential { this.description = 'Refer to official guide on how to get User Password from ElasticSearch' this.inputs = [ + { + label: 'Cloud ID', + name: 'cloudId', + type: 'string' + }, { label: 'ElasticSearch User', - name: 'elasticSearchUser', + name: 'username', type: 'string' }, { label: 'ElasticSearch Password', - name: 'elasticSearchPassword', + name: 'password', type: 'password' } ] diff --git a/packages/components/nodes/vectorstores/Elasticsearch/ElasticSearchBase.ts b/packages/components/nodes/vectorstores/Elasticsearch/ElasticSearchBase.ts new file mode 100644 index 00000000..59294b7e --- /dev/null +++ b/packages/components/nodes/vectorstores/Elasticsearch/ElasticSearchBase.ts @@ -0,0 +1,193 @@ +import { + getBaseClasses, + getCredentialData, + getCredentialParam, + ICommonObject, + INodeData, + INodeOutputsValue, + INodeParams +} from '../../../src' +import { Client, ClientOptions } from '@elastic/elasticsearch' +import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' +import { Embeddings } from 'langchain/embeddings/base' +import { VectorStore } from 'langchain/vectorstores/base' +import { Document } from 'langchain/document' + +export abstract class ElasticSearchBase { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + credential: INodeParams + outputs: INodeOutputsValue[] + + protected constructor() { + this.type = 'Elasticsearch' + this.icon = 'elasticsearch.png' + this.category = 'Vector Stores' + this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever'] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['elasticsearchApi', 'elasticSearchUserPassword'] + } + this.inputs = [ + { + label: 'Embeddings', + name: 'embeddings', + type: 'Embeddings' + }, + { + label: 'Index Name', + name: 'indexName', + placeholder: '', + type: 'string' + }, + { + label: 'Top K', + name: 'topK', + description: 'Number of top results to fetch. Default to 4', + placeholder: '4', + type: 'number', + additionalParams: true, + optional: true + }, + { + label: 'Similarity', + name: 'similarity', + description: 'Similarity measure used in Elasticsearch.', + type: 'options', + default: 'l2_norm', + options: [ + { + label: 'l2_norm', + name: 'l2_norm' + }, + { + label: 'dot_product', + name: 'dot_product' + }, + { + label: 'cosine', + name: 'cosine' + } + ], + additionalParams: true, + optional: true + } + ] + this.outputs = [ + { + label: 'Elasticsearch Retriever', + name: 'retriever', + baseClasses: this.baseClasses + }, + { + label: 'Elasticsearch Vector Store', + name: 'vectorStore', + baseClasses: [this.type, ...getBaseClasses(ElasticVectorSearch)] + } + ] + } + + abstract constructVectorStore( + embeddings: Embeddings, + elasticSearchClientArgs: ElasticClientArgs, + docs: Document>[] | undefined + ): Promise + + async init(nodeData: INodeData, _: string, options: ICommonObject, docs: Document>[] | undefined): Promise { + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const endPoint = getCredentialParam('endpoint', credentialData, nodeData) + const cloudId = getCredentialParam('cloudId', credentialData, nodeData) + const indexName = nodeData.inputs?.indexName as string + const embeddings = nodeData.inputs?.embeddings as Embeddings + const topK = nodeData.inputs?.topK as string + const similarityMeasure = nodeData.inputs?.similarityMeasure as string + const k = topK ? parseFloat(topK) : 4 + const output = nodeData.outputs?.output as string + + const elasticSearchClientArgs = this.prepareClientArgs(endPoint, cloudId, credentialData, nodeData, similarityMeasure, indexName) + + const vectorStore = await this.constructVectorStore(embeddings, elasticSearchClientArgs, docs) + + if (output === 'retriever') { + return vectorStore.asRetriever(k) + } else if (output === 'vectorStore') { + ;(vectorStore as any).k = k + return vectorStore + } + return vectorStore + } + + protected prepareConnectionOptions( + endPoint: string | undefined, + cloudId: string | undefined, + credentialData: ICommonObject, + nodeData: INodeData + ) { + let elasticSearchClientOptions: ClientOptions = {} + if (endPoint) { + let apiKey = getCredentialParam('apiKey', credentialData, nodeData) + elasticSearchClientOptions = { + node: endPoint, + auth: { + apiKey: apiKey + } + } + } else if (cloudId) { + let username = getCredentialParam('username', credentialData, nodeData) + let password = getCredentialParam('password', credentialData, nodeData) + elasticSearchClientOptions = { + cloud: { + id: cloudId + }, + auth: { + username: username, + password: password + } + } + } + return elasticSearchClientOptions + } + + protected prepareClientArgs( + endPoint: string | undefined, + cloudId: string | undefined, + credentialData: ICommonObject, + nodeData: INodeData, + similarityMeasure: string, + indexName: string + ) { + let elasticSearchClientOptions = this.prepareConnectionOptions(endPoint, cloudId, credentialData, nodeData) + let vectorSearchOptions = {} + switch (similarityMeasure) { + case 'dot_product': + vectorSearchOptions = { + similarity: 'dot_product' + } + break + case 'cosine': + vectorSearchOptions = { + similarity: 'cosine' + } + break + default: + vectorSearchOptions = { + similarity: 'l2_norm' + } + } + const elasticSearchClientArgs: ElasticClientArgs = { + client: new Client(elasticSearchClientOptions), + indexName: indexName, + vectorSearchOptions: vectorSearchOptions + } + return elasticSearchClientArgs + } +} diff --git a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts index 6e785c85..94e45d74 100644 --- a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts +++ b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts @@ -1,110 +1,30 @@ -import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData } from '../../../src/Interface' import { Embeddings } from 'langchain/embeddings/base' -import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src' -import { Client, ClientOptions } from '@elastic/elasticsearch' import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' +import { ElasticSearchBase } from './ElasticSearchBase' +import { VectorStore } from 'langchain/vectorstores/base' +import { Document } from 'langchain/document' -class ElasicsearchExisting_VectorStores implements INode { - label: string - name: string - version: number - description: string - type: string - icon: string - category: string - baseClasses: string[] - inputs: INodeParams[] - credential: INodeParams - outputs: INodeOutputsValue[] - +class ElasicsearchExisting_VectorStores extends ElasticSearchBase implements INode { constructor() { + super() this.label = 'Elasticsearch Load Existing Index' this.name = 'ElasticsearchIndex' this.version = 1.0 - this.type = 'Elasticsearch' - this.icon = 'elasticsearch.png' - this.category = 'Vector Stores' - this.description = 'Load existing index from Elasticsearch (i.e: Document has been upserted)' - this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever'] - this.credential = { - label: 'Connect Credential', - name: 'credential', - type: 'credential', - credentialNames: ['elasticsearchApi', 'elasticSearchUserPassword'] - } - this.inputs = [ - { - label: 'Embeddings', - name: 'embeddings', - type: 'Embeddings' - }, - { - label: 'Index Name', - name: 'indexName', - placeholder: '', - type: 'string' - }, - { - label: 'Top K', - name: 'topK', - description: 'Number of top results to fetch. Default to 4', - placeholder: '4', - type: 'number', - additionalParams: true, - optional: true - } - ] - this.outputs = [ - { - label: 'Elasticsearch Retriever', - name: 'retriever', - baseClasses: this.baseClasses - }, - { - label: 'Elasticsearch Vector Store', - name: 'vectorStore', - baseClasses: [this.type, ...getBaseClasses(ElasticVectorSearch)] - } - ] + this.description = 'Load existing index from Elasticsearch (i.e: Document has been upserted)' + } + + async constructVectorStore( + embeddings: Embeddings, + elasticSearchClientArgs: ElasticClientArgs, + docs: Document>[] | undefined + ): Promise { + return await ElasticVectorSearch.fromExistingIndex(embeddings, elasticSearchClientArgs) } async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { - const credentialData = await getCredentialData(nodeData.credential ?? '', options) - const endPoint = getCredentialParam('endpoint', credentialData, nodeData) - const apiKey = getCredentialParam('apiKey', credentialData, nodeData) - const indexName = nodeData.inputs?.indexName as string - const embeddings = nodeData.inputs?.embeddings as Embeddings - const topK = nodeData.inputs?.topK as string - - const k = topK ? parseFloat(topK) : 4 - const output = nodeData.outputs?.output as string - - // eslint-disable-next-line no-console - console.log('EndPoint:: ' + endPoint + ', APIKey:: ' + apiKey + ', Index:: ' + indexName) - - const elasticSearchClientOptions: ClientOptions = { - node: endPoint, - auth: { - apiKey: apiKey - } - } - - const elasticSearchClientArgs: ElasticClientArgs = { - client: new Client(elasticSearchClientOptions), - indexName: indexName - } - - const vectorStore = await ElasticVectorSearch.fromExistingIndex(embeddings, elasticSearchClientArgs) - // eslint-disable-next-line no-console - console.log('vectorStore ::' + vectorStore._vectorstoreType()) - if (output === 'retriever') { - return vectorStore.asRetriever(k) - } else if (output === 'vectorStore') { - ;(vectorStore as any).k = k - return vectorStore - } - return vectorStore + return super.init(nodeData, _, options, undefined) } } diff --git a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts index 5a0065d5..d4b79a5d 100644 --- a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts +++ b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts @@ -1,148 +1,39 @@ -import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData } from '../../../src/Interface' import { Embeddings } from 'langchain/embeddings/base' import { Document } from 'langchain/document' -import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src' -import { Client, ClientOptions } from '@elastic/elasticsearch' import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' import { flatten } from 'lodash' +import { ElasticSearchBase } from './ElasticSearchBase' +import { VectorStore } from 'langchain/vectorstores/base' -class ElasicsearchUpsert_VectorStores implements INode { - label: string - name: string - version: number - description: string - type: string - icon: string - category: string - baseClasses: string[] - inputs: INodeParams[] - credential: INodeParams - outputs: INodeOutputsValue[] - +class ElasicsearchUpsert_VectorStores extends ElasticSearchBase implements INode { constructor() { + super() this.label = 'Elasticsearch Upsert Document' this.name = 'ElasticsearchUpsert' this.version = 1.0 - this.type = 'Elasticsearch' - this.icon = 'elasticsearch.png' - this.category = 'Vector Stores' this.description = 'Upsert documents to Elasticsearch' - this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever'] - this.credential = { - label: 'Connect Credential', - name: 'credential', - type: 'credential', - credentialNames: ['elasticsearchApi', 'elasticSearchUserPassword'] - } - this.inputs = [ - { - label: 'Document', - name: 'document', - type: 'Document', - list: true - }, - { - label: 'Embeddings', - name: 'embeddings', - type: 'Embeddings' - }, - { - label: 'Index Name', - name: 'indexName', - placeholder: '', - type: 'string' - }, - { - label: 'Top K', - name: 'topK', - description: 'Number of top results to fetch. Default to 4', - placeholder: '4', - type: 'number', - additionalParams: true, - optional: true - }, - { - label: 'Similarity', - name: 'similarity', - description: 'Similarity measure used in Elasticsearch.', - type: 'options', - default: 'l2_norm', - options: [ - { - label: 'l2_norm', - name: 'l2_norm' - }, - { - label: 'dot_product', - name: 'dot_product' - }, - { - label: 'cosine', - name: 'cosine' - } - ], - additionalParams: true, - optional: true - } - ] - this.outputs = [ - { - label: 'Elasticsearch Retriever', - name: 'retriever', - baseClasses: this.baseClasses - }, - { - label: 'Elasticsearch Vector Store', - name: 'vectorStore', - baseClasses: [this.type, ...getBaseClasses(ElasticVectorSearch)] - } - ] + this.inputs.unshift({ + label: 'Document', + name: 'document', + type: 'Document', + list: true + }) + } + + async constructVectorStore( + embeddings: Embeddings, + elasticSearchClientArgs: ElasticClientArgs, + docs: Document>[] + ): Promise { + const vectorStore = new ElasticVectorSearch(embeddings, elasticSearchClientArgs) + await vectorStore.addDocuments(docs) + return vectorStore } async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { - const credentialData = await getCredentialData(nodeData.credential ?? '', options) - const endPoint = getCredentialParam('endpoint', credentialData, nodeData) - const apiKey = getCredentialParam('apiKey', credentialData, nodeData) const docs = nodeData.inputs?.document as Document[] - const indexName = nodeData.inputs?.indexName as string - const embeddings = nodeData.inputs?.embeddings as Embeddings - const topK = nodeData.inputs?.topK as string - const k = topK ? parseFloat(topK) : 4 - const output = nodeData.outputs?.output as string - const similarityMeasure = nodeData.inputs?.similarityMeasure as string - - // eslint-disable-next-line no-console - console.log('EndPoint:: ' + endPoint + ', APIKey:: ' + apiKey + ', Index:: ' + indexName) - - const elasticSearchClientOptions: ClientOptions = { - node: endPoint, - auth: { - apiKey: apiKey - } - } - let vectorSearchOptions = {} - switch (similarityMeasure) { - case 'dot_product': - vectorSearchOptions = { - similarity: 'dot_product' - } - break - case 'cosine': - vectorSearchOptions = { - similarity: 'cosine' - } - break - default: - vectorSearchOptions = { - similarity: 'l2_norm' - } - } - const elasticSearchClientArgs: ElasticClientArgs = { - client: new Client(elasticSearchClientOptions), - indexName: indexName, - vectorSearchOptions: vectorSearchOptions - } const flattenDocs = docs && docs.length ? flatten(docs) : [] const finalDocs = [] @@ -150,15 +41,14 @@ class ElasicsearchUpsert_VectorStores implements INode { finalDocs.push(new Document(flattenDocs[i])) } - const vectorStore = await ElasticVectorSearch.fromDocuments(finalDocs, embeddings, elasticSearchClientArgs) - - if (output === 'retriever') { - return vectorStore.asRetriever(k) - } else if (output === 'vectorStore') { - ;(vectorStore as any).k = k - return vectorStore - } - return vectorStore + // The following code is a workaround for a bug (Langchain Issue #1589) in the underlying library. + // Store does not support object in metadata and fail silently + finalDocs.forEach((d) => { + delete d.metadata.pdf + delete d.metadata.loc + }) + // end of workaround + return super.init(nodeData, _, options, flattenDocs) } }