feature: optimize jina sum (#26)
* feat: JinaSum plugin supports black url list and white url list * doc: update readme for update logmaster
parent
af540b731d
commit
3271aca68e
|
|
@ -168,6 +168,7 @@ python3 app.py # windows环境下该命令通
|
|||
|
||||
|
||||
# 更新日志
|
||||
- 2024/04/30 支持windows环境下企业微信个人号
|
||||
- 2024/04/24 集成JinaSum插件,修复总结微信公众号文章,修复dify usage key error, 修复dify私有部署的图片url错误
|
||||
- 2024/04/16 支持基本的企业微信客服通道,感谢[**@lei195827**](https://github.com/lei195827), [**@sisuad**](https://github.com/sisuad) 的贡献
|
||||
- 2024/04/14 Suno音乐插件,Dify on WeChat对接详细教程,config文件bug修复
|
||||
|
|
|
|||
|
|
@ -8,13 +8,15 @@ ChatGPT on WeChat项目插件, 使用jina reader和ChatGPT总结网页链接内
|
|||

|
||||
|
||||
config.json 配置说明
|
||||
```bash
|
||||
```json
|
||||
{
|
||||
"jina_reader_base": "https://r.jina.ai", # jina reader链接,默认为https://r.jina.ai
|
||||
"open_ai_api_base": "https://api.openai.com/v1", # chatgpt chat url
|
||||
"open_ai_api_key": "sk-xxx", # chatgpt api key
|
||||
"open_ai_model": "gpt-3.5-turbo", # chatgpt model
|
||||
"max_words": 8000, # 网页链接内容的最大字数,防止超过最大输入token,使用字符串长度简单计数
|
||||
"white_url_list": [], # url白名单, 列表为空时不做限制,黑名单优先级大于白名单,即当一个url既在白名单又在黑名单时,黑名单生效
|
||||
"black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"], # url黑名单,排除不支持总结的视频号等链接
|
||||
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" # 链接内容总结提示词
|
||||
}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -4,5 +4,7 @@
|
|||
"open_ai_api_key": "sk-xxx",
|
||||
"open_ai_model": "gpt-3.5-turbo",
|
||||
"max_words": 8000,
|
||||
"white_url_list": [],
|
||||
"black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"],
|
||||
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,11 @@ class JinaSum(Plugin):
|
|||
open_ai_model = "gpt-3.5-turbo"
|
||||
max_words = 8000
|
||||
prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n"
|
||||
white_url_list = []
|
||||
black_url_list = [
|
||||
"https://support.weixin.qq.com", # 视频号视频
|
||||
"https://channels-aladin.wxqcloud.qq.com", # 视频号音乐
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
|
@ -40,6 +45,8 @@ class JinaSum(Plugin):
|
|||
self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model)
|
||||
self.max_words = self.config.get("max_words", self.max_words)
|
||||
self.prompt = self.config.get("prompt", self.prompt)
|
||||
self.white_url_list = self.config.get("white_url_list", self.white_url_list)
|
||||
self.black_url_list = self.config.get("black_url_list", self.black_url_list)
|
||||
logger.info(f"[JinaSum] inited, config={self.config}")
|
||||
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
|
||||
except Exception as e:
|
||||
|
|
@ -53,7 +60,7 @@ class JinaSum(Plugin):
|
|||
if context.type != ContextType.SHARING and context.type != ContextType.TEXT:
|
||||
return
|
||||
if not self._check_url(content):
|
||||
logger.debug(f"[JinaSum] {content} not a url, skip")
|
||||
logger.debug(f"[JinaSum] {content} is not a valid url, skip")
|
||||
return
|
||||
if retry_count == 0:
|
||||
logger.debug("[JinaSum] on_handle_context. content: %s" % content)
|
||||
|
|
@ -126,5 +133,19 @@ class JinaSum(Plugin):
|
|||
return payload
|
||||
|
||||
def _check_url(self, target_url: str):
|
||||
stripped_url = target_url.strip()
|
||||
# 简单校验是否是url
|
||||
return target_url.strip().startswith("http://") or target_url.strip().startswith("https://")
|
||||
if not stripped_url.startswith("http://") and not stripped_url.startswith("https://"):
|
||||
return False
|
||||
|
||||
# 检查白名单
|
||||
if len(self.white_url_list):
|
||||
if not any(stripped_url.startswith(white_url) for white_url in self.white_url_list):
|
||||
return False
|
||||
|
||||
# 排除黑名单,黑名单优先级>白名单
|
||||
for black_url in self.black_url_list:
|
||||
if stripped_url.startswith(black_url):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
|
|
|||
Loading…
Reference in New Issue