[M6] Conan/vcpkg fuzzy match (Levenshtein); cmake -P CMAKE_ROOT lookup

This commit is contained in:
2026-05-15 14:08:49 +00:00
parent 8bbfcf7657
commit 01b3c28d6c
7 changed files with 269 additions and 20 deletions

View File

@@ -124,10 +124,10 @@ auto discover(const std::string& name, const std::string& version_spec,
std::vector<Candidate> candidates;
if (auto c = conan_probe(name); c) {
if (auto c = conan_probe_fuzzy(name); c) {
candidates.push_back({"conan", recipe_from_conan(*c, name, "conan")});
}
if (auto v = vcpkg_probe(name); v) {
if (auto v = vcpkg_probe_fuzzy(name); v) {
candidates.push_back({"vcpkg", recipe_from_vcpkg(*v, name, "vcpkg")});
}
// Multi-output nix packages keep CMake configs in the `dev` output.

View File

@@ -67,36 +67,44 @@ auto match_score(std::string_view stem, std::string_view pkg) -> int {
return 2;
}
// Find CMake's bundled Modules dir. `cmake -E capabilities` emits JSON
// with a `cmakeRoot` field; modules live at `${cmakeRoot}/Modules/`.
// We parse the value with a tiny string search rather than dragging
// nlohmann::json through this module — the field's value is always a
// quoted string immediately after the literal `"cmakeRoot":`.
// Find CMake's bundled Modules dir by running a one-line script that
// prints `CMAKE_ROOT`. We can't use `cmake -E capabilities` because
// CMake 4.x dropped the `cmakeRoot` field; `cmake -P` of a script with
// `message("${CMAKE_ROOT}")` is the portable path. The message text
// goes to stderr in `cmake -P` mode.
auto find_modules_dir() -> std::optional<fs::path> {
auto r = exec::run("cmake",
{"-E", "capabilities"},
auto script = fs::temp_directory_path() /
std::format("cargoxx-findroot-{}.cmake",
std::random_device{}());
{
std::ofstream out{script};
if (!out) {
return std::nullopt;
}
out << "message(\"${CMAKE_ROOT}\")\n";
}
auto r = exec::run("cmake", {"-P", script.string()},
exec::ExecOptions{
.cwd = fs::current_path(),
.env_overrides = {},
.timeout = std::chrono::seconds{5},
.inherit_stdio = false,
});
std::error_code ec;
fs::remove(script, ec);
if (!r || r->exit_code != 0) {
return std::nullopt;
}
std::string_view body = r->stdout_text;
constexpr std::string_view key = "\"cmakeRoot\":\"";
auto pos = body.find(key);
if (pos == std::string_view::npos) {
// The message goes to stderr in script mode; trim and use it.
std::string_view body = r->stderr_text;
while (!body.empty() && (body.back() == '\n' || body.back() == '\r' ||
body.back() == ' ' || body.back() == '\t')) {
body.remove_suffix(1);
}
if (body.empty()) {
return std::nullopt;
}
pos += key.size();
auto end = body.find('"', pos);
if (end == std::string_view::npos) {
return std::nullopt;
}
fs::path modules = fs::path{std::string{body.substr(pos, end - pos)}} / "Modules";
std::error_code ec;
fs::path modules = fs::path{std::string{body}} / "Modules";
if (!fs::exists(modules, ec) || ec) {
return std::nullopt;
}

View File

@@ -0,0 +1,193 @@
module;
#include <json.hpp>
module cargoxx.resolver;
import std;
import cargoxx.exec;
import cargoxx.util;
namespace cargoxx::resolver {
namespace fs = std::filesystem;
namespace {
auto network_error(std::string msg) -> util::Error {
return util::Error{util::ErrorCode::ResolutionNetworkError, std::move(msg),
"", std::nullopt, std::nullopt};
}
auto fetch_tree_paths(const std::string& url) -> util::Result<std::vector<std::string>> {
auto r = exec::run("curl", {"-fsSL", "--max-time", "20", url},
exec::ExecOptions{
.cwd = {},
.env_overrides = {},
.timeout = std::chrono::seconds{30},
.inherit_stdio = false,
});
if (!r) {
return std::unexpected(r.error());
}
if (r->exit_code != 0) {
return std::unexpected(network_error(std::format(
"curl failed (exit {}): {}", r->exit_code, r->stderr_text)));
}
nlohmann::json j;
try {
j = nlohmann::json::parse(r->stdout_text);
} catch (const nlohmann::json::parse_error& e) {
return std::unexpected(
network_error(std::format("tree listing not valid JSON: {}", e.what())));
}
if (!j.contains("tree") || !j["tree"].is_array()) {
return std::unexpected(network_error("tree listing missing 'tree' array"));
}
std::vector<std::string> out;
for (const auto& entry : j["tree"]) {
if (entry.contains("path") && entry["path"].is_string()) {
out.push_back(entry["path"].get<std::string>());
}
}
return out;
}
auto cache_root() -> fs::path {
if (auto* xdg = std::getenv("XDG_CACHE_HOME"); xdg && *xdg) {
return fs::path{xdg} / "cargoxx";
}
if (auto* home = std::getenv("HOME"); home && *home) {
return fs::path{home} / ".cache" / "cargoxx";
}
return fs::temp_directory_path() / "cargoxx";
}
constexpr auto INDEX_TTL = std::chrono::hours{24};
auto load_or_fetch(const std::string& cache_key, const std::string& url)
-> util::Result<std::vector<std::string>> {
auto path = cache_root() / std::format("{}-index.txt", cache_key);
std::error_code ec;
if (fs::exists(path, ec) && !ec) {
auto age = std::chrono::system_clock::now() -
std::chrono::file_clock::to_sys(fs::last_write_time(path));
if (age < INDEX_TTL) {
std::ifstream in{path};
if (in) {
std::vector<std::string> out;
std::string line;
while (std::getline(in, line)) {
if (!line.empty()) {
out.push_back(std::move(line));
}
}
if (!out.empty()) {
return out;
}
}
}
}
auto fresh = fetch_tree_paths(url);
if (!fresh) {
return std::unexpected(fresh.error());
}
fs::create_directories(path.parent_path(), ec);
if (std::ofstream out{path}; out) {
for (const auto& p : *fresh) {
out << p << '\n';
}
}
return fresh;
}
// Levenshtein top-k filter with a max-distance gate of ⌈len/4⌉ (min 1).
auto top_fuzzy(std::string_view query, const std::vector<std::string>& corpus,
std::size_t k) -> std::vector<std::string> {
const std::size_t cap = std::max<std::size_t>(1, (query.size() + 3) / 4);
struct Scored {
std::size_t dist;
std::string name;
};
std::vector<Scored> scored;
scored.reserve(corpus.size());
for (const auto& c : corpus) {
auto d = util::levenshtein(query, c);
if (d <= cap) {
scored.push_back({d, c});
}
}
std::ranges::sort(scored, [](const auto& a, const auto& b) {
if (a.dist != b.dist) {
return a.dist < b.dist;
}
return a.name < b.name;
});
std::vector<std::string> out;
for (std::size_t i = 0; i < std::min(k, scored.size()); ++i) {
out.push_back(std::move(scored[i].name));
}
return out;
}
constexpr auto FUZZY_K = std::size_t{3};
} // namespace
auto conan_probe_fuzzy(const std::string& name) -> util::Result<ConanRecipe> {
if (auto exact = conan_probe(name); exact) {
return exact;
}
auto index = load_or_fetch(
"conan",
"https://api.github.com/repos/conan-io/conan-center-index/git/trees/master:recipes");
if (!index) {
return std::unexpected(util::Error{
util::ErrorCode::ResolutionUnknownPackage,
std::format("no Conan recipe for '{}' and index fetch failed", name),
"", std::nullopt, std::nullopt,
});
}
auto candidates = top_fuzzy(name, *index, FUZZY_K);
for (const auto& cand : candidates) {
if (auto r = conan_probe(cand); r) {
return r;
}
}
return std::unexpected(util::Error{
util::ErrorCode::ResolutionUnknownPackage,
std::format("no Conan recipe matches '{}' (tried exact + fuzzy top-{})",
name, FUZZY_K),
"", std::nullopt, std::nullopt,
});
}
auto vcpkg_probe_fuzzy(const std::string& name) -> util::Result<VcpkgRecipe> {
if (auto exact = vcpkg_probe(name); exact) {
return exact;
}
auto index = load_or_fetch(
"vcpkg",
"https://api.github.com/repos/microsoft/vcpkg/git/trees/master:ports");
if (!index) {
return std::unexpected(util::Error{
util::ErrorCode::ResolutionUnknownPackage,
std::format("no vcpkg port for '{}' and index fetch failed", name),
"", std::nullopt, std::nullopt,
});
}
auto candidates = top_fuzzy(name, *index, FUZZY_K);
for (const auto& cand : candidates) {
if (auto r = vcpkg_probe(cand); r) {
return r;
}
}
return std::unexpected(util::Error{
util::ErrorCode::ResolutionUnknownPackage,
std::format("no vcpkg port matches '{}' (tried exact + fuzzy top-{})",
name, FUZZY_K),
"", std::nullopt, std::nullopt,
});
}
} // namespace cargoxx::resolver

View File

@@ -130,6 +130,13 @@ auto parse_conanfile(std::string_view conanfile_text, const std::string& fallbac
// ResolutionNetworkError.
auto conan_probe(const std::string& name) -> util::Result<ConanRecipe>;
// Like `conan_probe`, but on exact-name miss falls back to a fuzzy
// match against the conan-center-index recipe listing using
// Levenshtein distance ≤ ⌈len/4⌉. Returns the first fuzzy candidate
// whose conanfile.py parses cleanly. Internal-only: the user's
// originally-typed package name is preserved by the caller.
auto conan_probe_fuzzy(const std::string& name) -> util::Result<ConanRecipe>;
// Output of a microsoft/vcpkg port usage-file scrape.
struct VcpkgRecipe {
std::string find_package; // e.g. "fmt CONFIG REQUIRED"
@@ -148,6 +155,10 @@ auto parse_vcpkg_usage(std::string_view usage_text)
// ResolutionUnknownPackage; transport errors → ResolutionNetworkError.
auto vcpkg_probe(const std::string& name) -> util::Result<VcpkgRecipe>;
// Like `vcpkg_probe`, but on exact-name miss falls back to fuzzy
// matching against the vcpkg/ports listing (Levenshtein ≤ ⌈len/4⌉).
auto vcpkg_probe_fuzzy(const std::string& name) -> util::Result<VcpkgRecipe>;
// Caller-supplied closure that runs `cargoxx build` (or any equivalent
// build) on a project rooted at the given path. Injected so the resolver
// stays decoupled from `cargoxx.cli`.

30
src/util/levenshtein.cpp Normal file
View File

@@ -0,0 +1,30 @@
module cargoxx.util;
import std;
namespace cargoxx::util {
auto levenshtein(std::string_view a, std::string_view b) -> std::size_t {
if (a.size() < b.size()) {
std::swap(a, b);
}
std::vector<std::size_t> prev(b.size() + 1);
std::vector<std::size_t> curr(b.size() + 1);
std::iota(prev.begin(), prev.end(), std::size_t{0});
for (std::size_t i = 1; i <= a.size(); ++i) {
curr[0] = i;
for (std::size_t j = 1; j <= b.size(); ++j) {
auto cost = (a[i - 1] == b[j - 1]) ? std::size_t{0} : std::size_t{1};
curr[j] = std::min({
prev[j] + 1,
curr[j - 1] + 1,
prev[j - 1] + cost,
});
}
std::swap(prev, curr);
}
return prev[b.size()];
}
} // namespace cargoxx::util

View File

@@ -47,6 +47,11 @@ using Result = std::expected<T, Error>;
auto format(const Error& e) -> std::string;
// Classic Levenshtein edit distance. Used by the resolver's
// Conan/vcpkg fuzzy match when the user's nixpkgs name doesn't appear
// verbatim in those repositories' indexes (e.g. `sqlite` ↔ `sqlite3`).
auto levenshtein(std::string_view a, std::string_view b) -> std::size_t;
// Returns true if `version` (e.g. "10.2", "1.84.0") satisfies `range`.
//
// Supported range syntax: