feat: fetch tool (#1196)

This commit is contained in:
yetone 2025-02-06 19:13:47 +08:00 committed by GitHub
parent 77e20fd088
commit 1ec12907a2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 1037 additions and 11 deletions

864
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,7 @@ version = "0.1.0"
avante-tokenizers = { path = "crates/avante-tokenizers" }
avante-templates = { path = "crates/avante-templates" }
avante-repo-map = { path = "crates/avante-repo-map" }
avante-html2md = { path = "crates/avante-html2md" }
minijinja = { version = "2.4.0", features = [
"loader",
"json",

View File

@ -22,15 +22,17 @@ all: luajit
define make_definitions
ifeq ($(BUILD_FROM_SOURCE),true)
ifeq ($(TARGET_LIBRARY), all)
$1: $(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT) $(BUILD_DIR)/libAvanteTemplates-$1.$(EXT) $(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT)
$1: $(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT) $(BUILD_DIR)/libAvanteTemplates-$1.$(EXT) $(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT) $(BUILD_DIR)/libAvanteHtml2md-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), tokenizers)
$1: $(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), templates)
$1: $(BUILD_DIR)/libAvanteTemplates-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), repo-map)
$1: $(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT)
else ifeq ($(TARGET_LIBRARY), html2md)
$1: $(BUILD_DIR)/libAvanteHtml2md-$1.$(EXT)
else
$$(error TARGET_LIBRARY must be one of all, tokenizers, templates, repo-map)
$$(error TARGET_LIBRARY must be one of all, tokenizers, templates, repo-map, html2md)
endif
else
$1:
@ -50,11 +52,13 @@ define build_targets
$(BUILD_DIR)/libAvanteTokenizers-$1.$(EXT): $(BUILD_DIR) $1-tokenizers
$(BUILD_DIR)/libAvanteTemplates-$1.$(EXT): $(BUILD_DIR) $1-templates
$(BUILD_DIR)/libAvanteRepoMap-$1.$(EXT): $(BUILD_DIR) $1-repo-map
$(BUILD_DIR)/libAvanteHtml2md-$1.$(EXT): $(BUILD_DIR) $1-html2md
endef
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),tokenizers)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),templates)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),repo-map)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_package,$(lua_version),html2md)))
$(foreach lua_version,$(LUA_VERSIONS),$(eval $(call build_targets,$(lua_version))))
$(BUILD_DIR):

View File

@ -0,0 +1,25 @@
[lib]
crate-type = ["cdylib"]
[package]
name = "avante-html2md"
edition.workspace = true
rust-version.workspace = true
license.workspace = true
version.workspace = true
[dependencies]
htmd = "0.1.6"
html2md = "0.2.15"
mlua.workspace = true
reqwest = { version = "0.12.12", features = ["blocking"] }
[lints]
workspace = true
[features]
lua51 = ["mlua/lua51"]
lua52 = ["mlua/lua52"]
lua53 = ["mlua/lua53"]
lua54 = ["mlua/lua54"]
luajit = ["mlua/luajit"]

View File

@ -0,0 +1,81 @@
use htmd::HtmlToMarkdown;
use mlua::prelude::*;
use std::error::Error;
#[derive(Debug)]
enum MyError {
HtmlToMd(String),
Request(String),
}
impl std::fmt::Display for MyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
MyError::HtmlToMd(e) => write!(f, "HTML to Markdown error: {e}"),
MyError::Request(e) => write!(f, "Request error: {e}"),
}
}
}
impl Error for MyError {}
fn do_html2md(html: &str) -> Result<String, MyError> {
let converter = HtmlToMarkdown::builder()
.skip_tags(vec!["script", "style", "header", "footer"])
.build();
let md = converter
.convert(html)
.map_err(|e| MyError::HtmlToMd(e.to_string()))?;
Ok(md)
}
fn do_fetch_md(url: &str) -> Result<String, MyError> {
let mut headers = reqwest::header::HeaderMap::new();
headers.insert(
reqwest::header::USER_AGENT,
reqwest::header::HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"),
);
let client = reqwest::blocking::Client::builder()
.default_headers(headers)
.build()
.map_err(|e| MyError::Request(e.to_string()))?;
let response = client
.get(url)
.send()
.map_err(|e| MyError::Request(e.to_string()))?;
let body = response
.text()
.map_err(|e| MyError::Request(e.to_string()))?;
let html = body.trim().to_string();
let md = do_html2md(&html)?;
Ok(md)
}
#[mlua::lua_module]
fn avante_html2md(lua: &Lua) -> LuaResult<LuaTable> {
let exports = lua.create_table()?;
exports.set(
"fetch_md",
lua.create_function(move |_, url: String| -> LuaResult<String> {
do_fetch_md(&url).map_err(|e| mlua::Error::RuntimeError(e.to_string()))
})?,
)?;
exports.set(
"html2md",
lua.create_function(move |_, html: String| -> LuaResult<String> {
do_html2md(&html).map_err(|e| mlua::Error::RuntimeError(e.to_string()))
})?,
)?;
Ok(exports)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fetch_md() {
let md = do_fetch_md("https://github.com/yetone/avante.nvim").unwrap();
println!("{md}");
}
}

27
lua/avante/html2md.lua Normal file
View File

@ -0,0 +1,27 @@
---@class AvanteHtml2Md
---@field fetch_md fun(url: string): string
local _html2md_lib = nil
local M = {}
---@return AvanteHtml2Md|nil
function M._init_html2md_lib()
if _html2md_lib ~= nil then return _html2md_lib end
local ok, core = pcall(require, "avante_html2md")
if not ok then return nil end
_html2md_lib = core
return _html2md_lib
end
function M.setup() vim.defer_fn(M._init_html2md_lib, 1000) end
function M.fetch_md(url)
local html2md_lib = M._init_html2md_lib()
if not html2md_lib then return "", "Failed to load avante_html2md" end
return html2md_lib.fetch_md(url)
end
return M

View File

@ -360,6 +360,7 @@ function M.setup(opts)
H.load_path()
require("avante.html2md").setup()
require("avante.repo_map").setup()
require("avante.path").setup()
require("avante.highlights").setup()

View File

@ -308,6 +308,18 @@ function M.web_search(opts, on_log)
end
end
---@param opts { url: string }
---@param on_log? fun(log: string): nil
---@return string|nil result
---@return string|nil error
function M.fetch(opts, on_log)
if on_log then on_log("url: " .. opts.url) end
local Html2Md = require("avante.html2md")
local res = Html2Md.fetch_md(opts.url)
if res == nil then return nil, "Failed to fetch markdown" end
return res, nil
end
---@class AvanteLLMTool
---@field name string
---@field description string
@ -715,6 +727,33 @@ M.tools = {
},
},
},
{
name = "fetch",
description = "Fetch markdown from a url",
param = {
type = "table",
fields = {
{
name = "url",
description = "Url to fetch markdown from",
type = "string",
},
},
},
returns = {
{
name = "result",
description = "Result of the fetch",
type = "string",
},
{
name = "error",
description = "Error message if the fetch was not successful",
type = "string",
optional = true,
},
},
},
}
---@param tools AvanteLLMTool[]

View File

@ -13,7 +13,6 @@ local filetype_map = {
---@field stringify_definitions fun(lang: string, source: string): string
local repo_map_lib = nil
---@class avante.utils.repo_map
local RepoMap = {}
---@return AvanteRepoMap|nil

View File

@ -7,7 +7,6 @@ local lsp = vim.lsp
---@class avante.utils: LazyUtilCore
---@field tokens avante.utils.tokens
---@field root avante.utils.root
---@field repo_map avante.utils.repo_map
---@field file avante.utils.file
local M = {}