[M6] Conan/vcpkg fuzzy match (Levenshtein); cmake -P CMAKE_ROOT lookup
This commit is contained in:
@@ -60,6 +60,7 @@ target_sources(cargoxx
|
||||
../src/resolver/conan_probe.cpp
|
||||
../src/resolver/discover.cpp
|
||||
../src/resolver/findmodule_scan.cpp
|
||||
../src/resolver/fuzzy_listing.cpp
|
||||
../src/resolver/nix_cmake_scan.cpp
|
||||
../src/resolver/nixpkgs_git.cpp
|
||||
../src/resolver/nixpkgs_probe.cpp
|
||||
@@ -69,6 +70,7 @@ target_sources(cargoxx
|
||||
../src/resolver/verify_link.cpp
|
||||
../src/resolver/version_resolve.cpp
|
||||
../src/util/error.cpp
|
||||
../src/util/levenshtein.cpp
|
||||
../src/util/semver.cpp
|
||||
)
|
||||
target_include_directories(cargoxx SYSTEM PRIVATE ../third_party)
|
||||
|
||||
@@ -124,10 +124,10 @@ auto discover(const std::string& name, const std::string& version_spec,
|
||||
|
||||
std::vector<Candidate> candidates;
|
||||
|
||||
if (auto c = conan_probe(name); c) {
|
||||
if (auto c = conan_probe_fuzzy(name); c) {
|
||||
candidates.push_back({"conan", recipe_from_conan(*c, name, "conan")});
|
||||
}
|
||||
if (auto v = vcpkg_probe(name); v) {
|
||||
if (auto v = vcpkg_probe_fuzzy(name); v) {
|
||||
candidates.push_back({"vcpkg", recipe_from_vcpkg(*v, name, "vcpkg")});
|
||||
}
|
||||
// Multi-output nix packages keep CMake configs in the `dev` output.
|
||||
|
||||
@@ -67,36 +67,44 @@ auto match_score(std::string_view stem, std::string_view pkg) -> int {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Find CMake's bundled Modules dir. `cmake -E capabilities` emits JSON
|
||||
// with a `cmakeRoot` field; modules live at `${cmakeRoot}/Modules/`.
|
||||
// We parse the value with a tiny string search rather than dragging
|
||||
// nlohmann::json through this module — the field's value is always a
|
||||
// quoted string immediately after the literal `"cmakeRoot":`.
|
||||
// Find CMake's bundled Modules dir by running a one-line script that
|
||||
// prints `CMAKE_ROOT`. We can't use `cmake -E capabilities` because
|
||||
// CMake 4.x dropped the `cmakeRoot` field; `cmake -P` of a script with
|
||||
// `message("${CMAKE_ROOT}")` is the portable path. The message text
|
||||
// goes to stderr in `cmake -P` mode.
|
||||
auto find_modules_dir() -> std::optional<fs::path> {
|
||||
auto r = exec::run("cmake",
|
||||
{"-E", "capabilities"},
|
||||
auto script = fs::temp_directory_path() /
|
||||
std::format("cargoxx-findroot-{}.cmake",
|
||||
std::random_device{}());
|
||||
{
|
||||
std::ofstream out{script};
|
||||
if (!out) {
|
||||
return std::nullopt;
|
||||
}
|
||||
out << "message(\"${CMAKE_ROOT}\")\n";
|
||||
}
|
||||
auto r = exec::run("cmake", {"-P", script.string()},
|
||||
exec::ExecOptions{
|
||||
.cwd = fs::current_path(),
|
||||
.env_overrides = {},
|
||||
.timeout = std::chrono::seconds{5},
|
||||
.inherit_stdio = false,
|
||||
});
|
||||
std::error_code ec;
|
||||
fs::remove(script, ec);
|
||||
if (!r || r->exit_code != 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::string_view body = r->stdout_text;
|
||||
constexpr std::string_view key = "\"cmakeRoot\":\"";
|
||||
auto pos = body.find(key);
|
||||
if (pos == std::string_view::npos) {
|
||||
// The message goes to stderr in script mode; trim and use it.
|
||||
std::string_view body = r->stderr_text;
|
||||
while (!body.empty() && (body.back() == '\n' || body.back() == '\r' ||
|
||||
body.back() == ' ' || body.back() == '\t')) {
|
||||
body.remove_suffix(1);
|
||||
}
|
||||
if (body.empty()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
pos += key.size();
|
||||
auto end = body.find('"', pos);
|
||||
if (end == std::string_view::npos) {
|
||||
return std::nullopt;
|
||||
}
|
||||
fs::path modules = fs::path{std::string{body.substr(pos, end - pos)}} / "Modules";
|
||||
std::error_code ec;
|
||||
fs::path modules = fs::path{std::string{body}} / "Modules";
|
||||
if (!fs::exists(modules, ec) || ec) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
193
src/resolver/fuzzy_listing.cpp
Normal file
193
src/resolver/fuzzy_listing.cpp
Normal file
@@ -0,0 +1,193 @@
|
||||
module;
|
||||
|
||||
#include <json.hpp>
|
||||
|
||||
module cargoxx.resolver;
|
||||
|
||||
import std;
|
||||
import cargoxx.exec;
|
||||
import cargoxx.util;
|
||||
|
||||
namespace cargoxx::resolver {
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
auto network_error(std::string msg) -> util::Error {
|
||||
return util::Error{util::ErrorCode::ResolutionNetworkError, std::move(msg),
|
||||
"", std::nullopt, std::nullopt};
|
||||
}
|
||||
|
||||
auto fetch_tree_paths(const std::string& url) -> util::Result<std::vector<std::string>> {
|
||||
auto r = exec::run("curl", {"-fsSL", "--max-time", "20", url},
|
||||
exec::ExecOptions{
|
||||
.cwd = {},
|
||||
.env_overrides = {},
|
||||
.timeout = std::chrono::seconds{30},
|
||||
.inherit_stdio = false,
|
||||
});
|
||||
if (!r) {
|
||||
return std::unexpected(r.error());
|
||||
}
|
||||
if (r->exit_code != 0) {
|
||||
return std::unexpected(network_error(std::format(
|
||||
"curl failed (exit {}): {}", r->exit_code, r->stderr_text)));
|
||||
}
|
||||
nlohmann::json j;
|
||||
try {
|
||||
j = nlohmann::json::parse(r->stdout_text);
|
||||
} catch (const nlohmann::json::parse_error& e) {
|
||||
return std::unexpected(
|
||||
network_error(std::format("tree listing not valid JSON: {}", e.what())));
|
||||
}
|
||||
if (!j.contains("tree") || !j["tree"].is_array()) {
|
||||
return std::unexpected(network_error("tree listing missing 'tree' array"));
|
||||
}
|
||||
std::vector<std::string> out;
|
||||
for (const auto& entry : j["tree"]) {
|
||||
if (entry.contains("path") && entry["path"].is_string()) {
|
||||
out.push_back(entry["path"].get<std::string>());
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
auto cache_root() -> fs::path {
|
||||
if (auto* xdg = std::getenv("XDG_CACHE_HOME"); xdg && *xdg) {
|
||||
return fs::path{xdg} / "cargoxx";
|
||||
}
|
||||
if (auto* home = std::getenv("HOME"); home && *home) {
|
||||
return fs::path{home} / ".cache" / "cargoxx";
|
||||
}
|
||||
return fs::temp_directory_path() / "cargoxx";
|
||||
}
|
||||
|
||||
constexpr auto INDEX_TTL = std::chrono::hours{24};
|
||||
|
||||
auto load_or_fetch(const std::string& cache_key, const std::string& url)
|
||||
-> util::Result<std::vector<std::string>> {
|
||||
auto path = cache_root() / std::format("{}-index.txt", cache_key);
|
||||
std::error_code ec;
|
||||
if (fs::exists(path, ec) && !ec) {
|
||||
auto age = std::chrono::system_clock::now() -
|
||||
std::chrono::file_clock::to_sys(fs::last_write_time(path));
|
||||
if (age < INDEX_TTL) {
|
||||
std::ifstream in{path};
|
||||
if (in) {
|
||||
std::vector<std::string> out;
|
||||
std::string line;
|
||||
while (std::getline(in, line)) {
|
||||
if (!line.empty()) {
|
||||
out.push_back(std::move(line));
|
||||
}
|
||||
}
|
||||
if (!out.empty()) {
|
||||
return out;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
auto fresh = fetch_tree_paths(url);
|
||||
if (!fresh) {
|
||||
return std::unexpected(fresh.error());
|
||||
}
|
||||
fs::create_directories(path.parent_path(), ec);
|
||||
if (std::ofstream out{path}; out) {
|
||||
for (const auto& p : *fresh) {
|
||||
out << p << '\n';
|
||||
}
|
||||
}
|
||||
return fresh;
|
||||
}
|
||||
|
||||
// Levenshtein top-k filter with a max-distance gate of ⌈len/4⌉ (min 1).
|
||||
auto top_fuzzy(std::string_view query, const std::vector<std::string>& corpus,
|
||||
std::size_t k) -> std::vector<std::string> {
|
||||
const std::size_t cap = std::max<std::size_t>(1, (query.size() + 3) / 4);
|
||||
struct Scored {
|
||||
std::size_t dist;
|
||||
std::string name;
|
||||
};
|
||||
std::vector<Scored> scored;
|
||||
scored.reserve(corpus.size());
|
||||
for (const auto& c : corpus) {
|
||||
auto d = util::levenshtein(query, c);
|
||||
if (d <= cap) {
|
||||
scored.push_back({d, c});
|
||||
}
|
||||
}
|
||||
std::ranges::sort(scored, [](const auto& a, const auto& b) {
|
||||
if (a.dist != b.dist) {
|
||||
return a.dist < b.dist;
|
||||
}
|
||||
return a.name < b.name;
|
||||
});
|
||||
std::vector<std::string> out;
|
||||
for (std::size_t i = 0; i < std::min(k, scored.size()); ++i) {
|
||||
out.push_back(std::move(scored[i].name));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
constexpr auto FUZZY_K = std::size_t{3};
|
||||
|
||||
} // namespace
|
||||
|
||||
auto conan_probe_fuzzy(const std::string& name) -> util::Result<ConanRecipe> {
|
||||
if (auto exact = conan_probe(name); exact) {
|
||||
return exact;
|
||||
}
|
||||
auto index = load_or_fetch(
|
||||
"conan",
|
||||
"https://api.github.com/repos/conan-io/conan-center-index/git/trees/master:recipes");
|
||||
if (!index) {
|
||||
return std::unexpected(util::Error{
|
||||
util::ErrorCode::ResolutionUnknownPackage,
|
||||
std::format("no Conan recipe for '{}' and index fetch failed", name),
|
||||
"", std::nullopt, std::nullopt,
|
||||
});
|
||||
}
|
||||
auto candidates = top_fuzzy(name, *index, FUZZY_K);
|
||||
for (const auto& cand : candidates) {
|
||||
if (auto r = conan_probe(cand); r) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
return std::unexpected(util::Error{
|
||||
util::ErrorCode::ResolutionUnknownPackage,
|
||||
std::format("no Conan recipe matches '{}' (tried exact + fuzzy top-{})",
|
||||
name, FUZZY_K),
|
||||
"", std::nullopt, std::nullopt,
|
||||
});
|
||||
}
|
||||
|
||||
auto vcpkg_probe_fuzzy(const std::string& name) -> util::Result<VcpkgRecipe> {
|
||||
if (auto exact = vcpkg_probe(name); exact) {
|
||||
return exact;
|
||||
}
|
||||
auto index = load_or_fetch(
|
||||
"vcpkg",
|
||||
"https://api.github.com/repos/microsoft/vcpkg/git/trees/master:ports");
|
||||
if (!index) {
|
||||
return std::unexpected(util::Error{
|
||||
util::ErrorCode::ResolutionUnknownPackage,
|
||||
std::format("no vcpkg port for '{}' and index fetch failed", name),
|
||||
"", std::nullopt, std::nullopt,
|
||||
});
|
||||
}
|
||||
auto candidates = top_fuzzy(name, *index, FUZZY_K);
|
||||
for (const auto& cand : candidates) {
|
||||
if (auto r = vcpkg_probe(cand); r) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
return std::unexpected(util::Error{
|
||||
util::ErrorCode::ResolutionUnknownPackage,
|
||||
std::format("no vcpkg port matches '{}' (tried exact + fuzzy top-{})",
|
||||
name, FUZZY_K),
|
||||
"", std::nullopt, std::nullopt,
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace cargoxx::resolver
|
||||
@@ -130,6 +130,13 @@ auto parse_conanfile(std::string_view conanfile_text, const std::string& fallbac
|
||||
// ResolutionNetworkError.
|
||||
auto conan_probe(const std::string& name) -> util::Result<ConanRecipe>;
|
||||
|
||||
// Like `conan_probe`, but on exact-name miss falls back to a fuzzy
|
||||
// match against the conan-center-index recipe listing using
|
||||
// Levenshtein distance ≤ ⌈len/4⌉. Returns the first fuzzy candidate
|
||||
// whose conanfile.py parses cleanly. Internal-only: the user's
|
||||
// originally-typed package name is preserved by the caller.
|
||||
auto conan_probe_fuzzy(const std::string& name) -> util::Result<ConanRecipe>;
|
||||
|
||||
// Output of a microsoft/vcpkg port usage-file scrape.
|
||||
struct VcpkgRecipe {
|
||||
std::string find_package; // e.g. "fmt CONFIG REQUIRED"
|
||||
@@ -148,6 +155,10 @@ auto parse_vcpkg_usage(std::string_view usage_text)
|
||||
// ResolutionUnknownPackage; transport errors → ResolutionNetworkError.
|
||||
auto vcpkg_probe(const std::string& name) -> util::Result<VcpkgRecipe>;
|
||||
|
||||
// Like `vcpkg_probe`, but on exact-name miss falls back to fuzzy
|
||||
// matching against the vcpkg/ports listing (Levenshtein ≤ ⌈len/4⌉).
|
||||
auto vcpkg_probe_fuzzy(const std::string& name) -> util::Result<VcpkgRecipe>;
|
||||
|
||||
// Caller-supplied closure that runs `cargoxx build` (or any equivalent
|
||||
// build) on a project rooted at the given path. Injected so the resolver
|
||||
// stays decoupled from `cargoxx.cli`.
|
||||
|
||||
30
src/util/levenshtein.cpp
Normal file
30
src/util/levenshtein.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
module cargoxx.util;
|
||||
|
||||
import std;
|
||||
|
||||
namespace cargoxx::util {
|
||||
|
||||
auto levenshtein(std::string_view a, std::string_view b) -> std::size_t {
|
||||
if (a.size() < b.size()) {
|
||||
std::swap(a, b);
|
||||
}
|
||||
std::vector<std::size_t> prev(b.size() + 1);
|
||||
std::vector<std::size_t> curr(b.size() + 1);
|
||||
std::iota(prev.begin(), prev.end(), std::size_t{0});
|
||||
|
||||
for (std::size_t i = 1; i <= a.size(); ++i) {
|
||||
curr[0] = i;
|
||||
for (std::size_t j = 1; j <= b.size(); ++j) {
|
||||
auto cost = (a[i - 1] == b[j - 1]) ? std::size_t{0} : std::size_t{1};
|
||||
curr[j] = std::min({
|
||||
prev[j] + 1,
|
||||
curr[j - 1] + 1,
|
||||
prev[j - 1] + cost,
|
||||
});
|
||||
}
|
||||
std::swap(prev, curr);
|
||||
}
|
||||
return prev[b.size()];
|
||||
}
|
||||
|
||||
} // namespace cargoxx::util
|
||||
@@ -47,6 +47,11 @@ using Result = std::expected<T, Error>;
|
||||
|
||||
auto format(const Error& e) -> std::string;
|
||||
|
||||
// Classic Levenshtein edit distance. Used by the resolver's
|
||||
// Conan/vcpkg fuzzy match when the user's nixpkgs name doesn't appear
|
||||
// verbatim in those repositories' indexes (e.g. `sqlite` ↔ `sqlite3`).
|
||||
auto levenshtein(std::string_view a, std::string_view b) -> std::size_t;
|
||||
|
||||
// Returns true if `version` (e.g. "10.2", "1.84.0") satisfies `range`.
|
||||
//
|
||||
// Supported range syntax:
|
||||
|
||||
Reference in New Issue
Block a user