feat: integrate jina sum plugin

master
Han Fangyuan 2024-04-20 07:51:41 +08:00
parent a4cbecaab5
commit 1952e834d8
7 changed files with 183 additions and 0 deletions

1
.gitignore vendored
View File

@ -30,4 +30,5 @@ plugins/banwords/lib/__pycache__
!plugins/role
!plugins/keyword
!plugins/linkai
!plugins/jina_sum
client_config.json

2
plugins/jina_sum/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
__pycache__/
config.json

21
plugins/jina_sum/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Han Fangyuan
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,20 @@
# jina_sumary
ChatGPT on WeChat项目插件, 使用jina reader和ChatGPT总结网页链接内容
支持总结公众号、小红书、csdn等分享卡片链接(有的卡片链接会触发验证,一般直链没有此问题)
![wechat_mp](./docs/images/wechat_mp.jpg)
![red](./docs/images/red.jpg)
![csdn](./docs/images/csdn.jpg)
config.json 配置说明
```bash
{
"jina_reader_base": "https://r.jina.ai", # jina reader链接默认为https://r.jina.ai
"open_ai_api_base": "https://api.openai.com/v1", # chatgpt chat url
"open_ai_api_key": "sk-xxx", # chatgpt api key
"open_ai_model": "gpt-3.5-turbo", # chatgpt model
"max_words": 8000, # 网页链接内容的最大字数防止超过最大输入token使用字符串长度简单计数
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" # 链接内容总结提示词
}
```

View File

@ -0,0 +1 @@
from .jina_sum import *

View File

@ -0,0 +1,8 @@
{
"jina_reader_base": "https://r.jina.ai",
"open_ai_api_base": "https://api.openai.com/v1",
"open_ai_api_key": "sk-xxx",
"open_ai_model": "gpt-3.5-turbo",
"max_words": 8000,
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。"
}

View File

@ -0,0 +1,130 @@
# encoding:utf-8
import json
import os
import html
from urllib.parse import urlparse
import requests
import plugins
from bridge.context import ContextType
from bridge.reply import Reply, ReplyType
from common.log import logger
from plugins import *
@plugins.register(
name="JinaSum",
desire_priority=10,
hidden=False,
desc="Sum url link content with jina reader and llm",
version="v0.0.1",
author="hanfangyuan",
)
class JinaSum(Plugin):
jina_reader_base = "https://r.jina.ai"
open_ai_api_base = "https://api.openai.com/v1"
open_ai_model = "gpt-3.5-turbo"
max_words = 8000
prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n"
def __init__(self):
super().__init__()
try:
self.config = super().load_config()
if not self.config:
self.config = self._load_config_template()
self.jina_reader_base = self.config.get("jina_reader_base", self.jina_reader_base)
self.open_ai_api_base = self.config.get("open_ai_api_base", self.open_ai_api_base)
self.open_ai_api_key = self.config.get("open_ai_api_key", "")
self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model)
self.max_words = self.config.get("max_words", self.max_words)
self.prompt = self.config.get("prompt", self.prompt)
logger.info(f"[JinaSum] inited, config={self.config}")
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
except Exception as e:
logger.error(f"[JinaSum] 初始化异常:{e}")
raise "[JinaSum] init failed, ignore "
def on_handle_context(self, e_context: EventContext, retry_count: int = 0):
try:
context = e_context["context"]
content = context.content
if context.type != ContextType.SHARING and context.type != ContextType.TEXT:
return
if not self._check_url(content):
logger.debug(f"[JinaSum] {content} not a url, skip")
return
if retry_count == 0:
logger.debug("[JinaSum] on_handle_context. content: %s" % content)
reply = Reply(ReplyType.TEXT, "🎉正在为您生成总结,请稍候...")
channel = e_context["channel"]
channel.send(reply, context)
target_url = html.unescape(content) # 解决公众号卡片链接校验问题,参考 https://github.com/fatwang2/sum4all/commit/b983c49473fc55f13ba2c44e4d8b226db3517c45
jina_url = self._get_jina_url(target_url)
response = requests.get(jina_url, timeout=60)
response.raise_for_status()
target_url_content = response.text
openai_chat_url = self._get_openai_chat_url()
openai_headers = self._get_openai_headers()
openai_payload = self._get_openai_payload(target_url_content)
logger.debug(f"[JinaSum] openai_chat_url: {openai_chat_url}, openai_headers: {openai_headers}, openai_payload: {openai_payload}")
response = requests.post(openai_chat_url, headers=openai_headers, json=openai_payload, timeout=60)
response.raise_for_status()
result = response.json()['choices'][0]['message']['content']
reply = Reply(ReplyType.TEXT, result)
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS
except Exception as e:
if retry_count < 3:
logger.warning(f"[JinaSum] {str(e)}, retry {retry_count + 1}")
self.on_handle_context(e_context, retry_count + 1)
return
logger.exception(f"[JinaSum] {str(e)}")
reply = Reply(ReplyType.ERROR, "我暂时无法总结链接,请稍后再试")
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS
def get_help_text(self, verbose, **kwargs):
return f'使用jina reader和ChatGPT总结网页链接内容'
def _load_config_template(self):
logger.debug("No Suno plugin config.json, use plugins/jina_sum/config.json.template")
try:
plugin_config_path = os.path.join(self.path, "config.json.template")
if os.path.exists(plugin_config_path):
with open(plugin_config_path, "r", encoding="utf-8") as f:
plugin_conf = json.load(f)
return plugin_conf
except Exception as e:
logger.exception(e)
def _get_jina_url(self, target_url):
return self.jina_reader_base + "/" + target_url
def _get_openai_chat_url(self):
return self.open_ai_api_base + "/chat/completions"
def _get_openai_headers(self):
return {
'Authorization': f"Bearer {self.open_ai_api_key}",
'Host': urlparse(self.open_ai_api_base).netloc
}
def _get_openai_payload(self, target_url_content):
target_url_content = target_url_content[:self.max_words] # 通过字符串长度简单进行截断
sum_prompt = f"{self.prompt}\n\n'''{target_url_content}'''"
messages = [{"role": "user", "content": sum_prompt}]
payload = {
'model': self.open_ai_model,
'messages': messages
}
return payload
def _check_url(self, target_url: str):
# 简单校验是否是url
return target_url.strip().startswith("http://") or target_url.strip().startswith("https://")