feature: optimize jina sum (#26)

* feat: JinaSum plugin supports black url list and white url list

* doc: update readme for update log
master
Han Fangyuan 2024-05-01 18:24:13 +08:00 committed by GitHub
parent af540b731d
commit 3271aca68e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 29 additions and 3 deletions

View File

@ -168,6 +168,7 @@ python3 app.py # windows环境下该命令通
# 更新日志
- 2024/04/30 支持windows环境下企业微信个人号
- 2024/04/24 集成JinaSum插件修复总结微信公众号文章修复dify usage key error, 修复dify私有部署的图片url错误
- 2024/04/16 支持基本的企业微信客服通道,感谢[**@lei195827**](https://github.com/lei195827), [**@sisuad**](https://github.com/sisuad) 的贡献
- 2024/04/14 Suno音乐插件Dify on WeChat对接详细教程config文件bug修复

View File

@ -8,13 +8,15 @@ ChatGPT on WeChat项目插件, 使用jina reader和ChatGPT总结网页链接内
![csdn](./docs/images/csdn.jpg)
config.json 配置说明
```bash
```json
{
"jina_reader_base": "https://r.jina.ai", # jina reader链接默认为https://r.jina.ai
"open_ai_api_base": "https://api.openai.com/v1", # chatgpt chat url
"open_ai_api_key": "sk-xxx", # chatgpt api key
"open_ai_model": "gpt-3.5-turbo", # chatgpt model
"max_words": 8000, # 网页链接内容的最大字数防止超过最大输入token使用字符串长度简单计数
"white_url_list": [], # url白名单, 列表为空时不做限制黑名单优先级大于白名单即当一个url既在白名单又在黑名单时黑名单生效
"black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"], # url黑名单排除不支持总结的视频号等链接
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" # 链接内容总结提示词
}
```

View File

@ -4,5 +4,7 @@
"open_ai_api_key": "sk-xxx",
"open_ai_model": "gpt-3.5-turbo",
"max_words": 8000,
"white_url_list": [],
"black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"],
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。"
}

View File

@ -27,6 +27,11 @@ class JinaSum(Plugin):
open_ai_model = "gpt-3.5-turbo"
max_words = 8000
prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n"
white_url_list = []
black_url_list = [
"https://support.weixin.qq.com", # 视频号视频
"https://channels-aladin.wxqcloud.qq.com", # 视频号音乐
]
def __init__(self):
super().__init__()
@ -40,6 +45,8 @@ class JinaSum(Plugin):
self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model)
self.max_words = self.config.get("max_words", self.max_words)
self.prompt = self.config.get("prompt", self.prompt)
self.white_url_list = self.config.get("white_url_list", self.white_url_list)
self.black_url_list = self.config.get("black_url_list", self.black_url_list)
logger.info(f"[JinaSum] inited, config={self.config}")
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
except Exception as e:
@ -53,7 +60,7 @@ class JinaSum(Plugin):
if context.type != ContextType.SHARING and context.type != ContextType.TEXT:
return
if not self._check_url(content):
logger.debug(f"[JinaSum] {content} not a url, skip")
logger.debug(f"[JinaSum] {content} is not a valid url, skip")
return
if retry_count == 0:
logger.debug("[JinaSum] on_handle_context. content: %s" % content)
@ -126,5 +133,19 @@ class JinaSum(Plugin):
return payload
def _check_url(self, target_url: str):
stripped_url = target_url.strip()
# 简单校验是否是url
return target_url.strip().startswith("http://") or target_url.strip().startswith("https://")
if not stripped_url.startswith("http://") and not stripped_url.startswith("https://"):
return False
# 检查白名单
if len(self.white_url_list):
if not any(stripped_url.startswith(white_url) for white_url in self.white_url_list):
return False
# 排除黑名单,黑名单优先级>白名单
for black_url in self.black_url_list:
if stripped_url.startswith(black_url):
return False
return True