diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt index 30f5389..c20fd4d 100644 --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -60,6 +60,7 @@ target_sources(cargoxx ../src/resolver/conan_probe.cpp ../src/resolver/discover.cpp ../src/resolver/findmodule_scan.cpp + ../src/resolver/fuzzy_listing.cpp ../src/resolver/nix_cmake_scan.cpp ../src/resolver/nixpkgs_git.cpp ../src/resolver/nixpkgs_probe.cpp @@ -69,6 +70,7 @@ target_sources(cargoxx ../src/resolver/verify_link.cpp ../src/resolver/version_resolve.cpp ../src/util/error.cpp + ../src/util/levenshtein.cpp ../src/util/semver.cpp ) target_include_directories(cargoxx SYSTEM PRIVATE ../third_party) diff --git a/src/resolver/discover.cpp b/src/resolver/discover.cpp index fcd563b..d2020bb 100644 --- a/src/resolver/discover.cpp +++ b/src/resolver/discover.cpp @@ -124,10 +124,10 @@ auto discover(const std::string& name, const std::string& version_spec, std::vector candidates; - if (auto c = conan_probe(name); c) { + if (auto c = conan_probe_fuzzy(name); c) { candidates.push_back({"conan", recipe_from_conan(*c, name, "conan")}); } - if (auto v = vcpkg_probe(name); v) { + if (auto v = vcpkg_probe_fuzzy(name); v) { candidates.push_back({"vcpkg", recipe_from_vcpkg(*v, name, "vcpkg")}); } // Multi-output nix packages keep CMake configs in the `dev` output. diff --git a/src/resolver/findmodule_scan.cpp b/src/resolver/findmodule_scan.cpp index 842c87a..afcd2af 100644 --- a/src/resolver/findmodule_scan.cpp +++ b/src/resolver/findmodule_scan.cpp @@ -67,36 +67,44 @@ auto match_score(std::string_view stem, std::string_view pkg) -> int { return 2; } -// Find CMake's bundled Modules dir. `cmake -E capabilities` emits JSON -// with a `cmakeRoot` field; modules live at `${cmakeRoot}/Modules/`. -// We parse the value with a tiny string search rather than dragging -// nlohmann::json through this module — the field's value is always a -// quoted string immediately after the literal `"cmakeRoot":`. +// Find CMake's bundled Modules dir by running a one-line script that +// prints `CMAKE_ROOT`. We can't use `cmake -E capabilities` because +// CMake 4.x dropped the `cmakeRoot` field; `cmake -P` of a script with +// `message("${CMAKE_ROOT}")` is the portable path. The message text +// goes to stderr in `cmake -P` mode. auto find_modules_dir() -> std::optional { - auto r = exec::run("cmake", - {"-E", "capabilities"}, + auto script = fs::temp_directory_path() / + std::format("cargoxx-findroot-{}.cmake", + std::random_device{}()); + { + std::ofstream out{script}; + if (!out) { + return std::nullopt; + } + out << "message(\"${CMAKE_ROOT}\")\n"; + } + auto r = exec::run("cmake", {"-P", script.string()}, exec::ExecOptions{ .cwd = fs::current_path(), .env_overrides = {}, .timeout = std::chrono::seconds{5}, .inherit_stdio = false, }); + std::error_code ec; + fs::remove(script, ec); if (!r || r->exit_code != 0) { return std::nullopt; } - std::string_view body = r->stdout_text; - constexpr std::string_view key = "\"cmakeRoot\":\""; - auto pos = body.find(key); - if (pos == std::string_view::npos) { + // The message goes to stderr in script mode; trim and use it. + std::string_view body = r->stderr_text; + while (!body.empty() && (body.back() == '\n' || body.back() == '\r' || + body.back() == ' ' || body.back() == '\t')) { + body.remove_suffix(1); + } + if (body.empty()) { return std::nullopt; } - pos += key.size(); - auto end = body.find('"', pos); - if (end == std::string_view::npos) { - return std::nullopt; - } - fs::path modules = fs::path{std::string{body.substr(pos, end - pos)}} / "Modules"; - std::error_code ec; + fs::path modules = fs::path{std::string{body}} / "Modules"; if (!fs::exists(modules, ec) || ec) { return std::nullopt; } diff --git a/src/resolver/fuzzy_listing.cpp b/src/resolver/fuzzy_listing.cpp new file mode 100644 index 0000000..daddcc7 --- /dev/null +++ b/src/resolver/fuzzy_listing.cpp @@ -0,0 +1,193 @@ +module; + +#include + +module cargoxx.resolver; + +import std; +import cargoxx.exec; +import cargoxx.util; + +namespace cargoxx::resolver { + +namespace fs = std::filesystem; + +namespace { + +auto network_error(std::string msg) -> util::Error { + return util::Error{util::ErrorCode::ResolutionNetworkError, std::move(msg), + "", std::nullopt, std::nullopt}; +} + +auto fetch_tree_paths(const std::string& url) -> util::Result> { + auto r = exec::run("curl", {"-fsSL", "--max-time", "20", url}, + exec::ExecOptions{ + .cwd = {}, + .env_overrides = {}, + .timeout = std::chrono::seconds{30}, + .inherit_stdio = false, + }); + if (!r) { + return std::unexpected(r.error()); + } + if (r->exit_code != 0) { + return std::unexpected(network_error(std::format( + "curl failed (exit {}): {}", r->exit_code, r->stderr_text))); + } + nlohmann::json j; + try { + j = nlohmann::json::parse(r->stdout_text); + } catch (const nlohmann::json::parse_error& e) { + return std::unexpected( + network_error(std::format("tree listing not valid JSON: {}", e.what()))); + } + if (!j.contains("tree") || !j["tree"].is_array()) { + return std::unexpected(network_error("tree listing missing 'tree' array")); + } + std::vector out; + for (const auto& entry : j["tree"]) { + if (entry.contains("path") && entry["path"].is_string()) { + out.push_back(entry["path"].get()); + } + } + return out; +} + +auto cache_root() -> fs::path { + if (auto* xdg = std::getenv("XDG_CACHE_HOME"); xdg && *xdg) { + return fs::path{xdg} / "cargoxx"; + } + if (auto* home = std::getenv("HOME"); home && *home) { + return fs::path{home} / ".cache" / "cargoxx"; + } + return fs::temp_directory_path() / "cargoxx"; +} + +constexpr auto INDEX_TTL = std::chrono::hours{24}; + +auto load_or_fetch(const std::string& cache_key, const std::string& url) + -> util::Result> { + auto path = cache_root() / std::format("{}-index.txt", cache_key); + std::error_code ec; + if (fs::exists(path, ec) && !ec) { + auto age = std::chrono::system_clock::now() - + std::chrono::file_clock::to_sys(fs::last_write_time(path)); + if (age < INDEX_TTL) { + std::ifstream in{path}; + if (in) { + std::vector out; + std::string line; + while (std::getline(in, line)) { + if (!line.empty()) { + out.push_back(std::move(line)); + } + } + if (!out.empty()) { + return out; + } + } + } + } + auto fresh = fetch_tree_paths(url); + if (!fresh) { + return std::unexpected(fresh.error()); + } + fs::create_directories(path.parent_path(), ec); + if (std::ofstream out{path}; out) { + for (const auto& p : *fresh) { + out << p << '\n'; + } + } + return fresh; +} + +// Levenshtein top-k filter with a max-distance gate of ⌈len/4⌉ (min 1). +auto top_fuzzy(std::string_view query, const std::vector& corpus, + std::size_t k) -> std::vector { + const std::size_t cap = std::max(1, (query.size() + 3) / 4); + struct Scored { + std::size_t dist; + std::string name; + }; + std::vector scored; + scored.reserve(corpus.size()); + for (const auto& c : corpus) { + auto d = util::levenshtein(query, c); + if (d <= cap) { + scored.push_back({d, c}); + } + } + std::ranges::sort(scored, [](const auto& a, const auto& b) { + if (a.dist != b.dist) { + return a.dist < b.dist; + } + return a.name < b.name; + }); + std::vector out; + for (std::size_t i = 0; i < std::min(k, scored.size()); ++i) { + out.push_back(std::move(scored[i].name)); + } + return out; +} + +constexpr auto FUZZY_K = std::size_t{3}; + +} // namespace + +auto conan_probe_fuzzy(const std::string& name) -> util::Result { + if (auto exact = conan_probe(name); exact) { + return exact; + } + auto index = load_or_fetch( + "conan", + "https://api.github.com/repos/conan-io/conan-center-index/git/trees/master:recipes"); + if (!index) { + return std::unexpected(util::Error{ + util::ErrorCode::ResolutionUnknownPackage, + std::format("no Conan recipe for '{}' and index fetch failed", name), + "", std::nullopt, std::nullopt, + }); + } + auto candidates = top_fuzzy(name, *index, FUZZY_K); + for (const auto& cand : candidates) { + if (auto r = conan_probe(cand); r) { + return r; + } + } + return std::unexpected(util::Error{ + util::ErrorCode::ResolutionUnknownPackage, + std::format("no Conan recipe matches '{}' (tried exact + fuzzy top-{})", + name, FUZZY_K), + "", std::nullopt, std::nullopt, + }); +} + +auto vcpkg_probe_fuzzy(const std::string& name) -> util::Result { + if (auto exact = vcpkg_probe(name); exact) { + return exact; + } + auto index = load_or_fetch( + "vcpkg", + "https://api.github.com/repos/microsoft/vcpkg/git/trees/master:ports"); + if (!index) { + return std::unexpected(util::Error{ + util::ErrorCode::ResolutionUnknownPackage, + std::format("no vcpkg port for '{}' and index fetch failed", name), + "", std::nullopt, std::nullopt, + }); + } + auto candidates = top_fuzzy(name, *index, FUZZY_K); + for (const auto& cand : candidates) { + if (auto r = vcpkg_probe(cand); r) { + return r; + } + } + return std::unexpected(util::Error{ + util::ErrorCode::ResolutionUnknownPackage, + std::format("no vcpkg port matches '{}' (tried exact + fuzzy top-{})", + name, FUZZY_K), + "", std::nullopt, std::nullopt, + }); +} + +} // namespace cargoxx::resolver diff --git a/src/resolver/resolver.cppm b/src/resolver/resolver.cppm index 12bc8ba..4e75b99 100644 --- a/src/resolver/resolver.cppm +++ b/src/resolver/resolver.cppm @@ -130,6 +130,13 @@ auto parse_conanfile(std::string_view conanfile_text, const std::string& fallbac // ResolutionNetworkError. auto conan_probe(const std::string& name) -> util::Result; +// Like `conan_probe`, but on exact-name miss falls back to a fuzzy +// match against the conan-center-index recipe listing using +// Levenshtein distance ≤ ⌈len/4⌉. Returns the first fuzzy candidate +// whose conanfile.py parses cleanly. Internal-only: the user's +// originally-typed package name is preserved by the caller. +auto conan_probe_fuzzy(const std::string& name) -> util::Result; + // Output of a microsoft/vcpkg port usage-file scrape. struct VcpkgRecipe { std::string find_package; // e.g. "fmt CONFIG REQUIRED" @@ -148,6 +155,10 @@ auto parse_vcpkg_usage(std::string_view usage_text) // ResolutionUnknownPackage; transport errors → ResolutionNetworkError. auto vcpkg_probe(const std::string& name) -> util::Result; +// Like `vcpkg_probe`, but on exact-name miss falls back to fuzzy +// matching against the vcpkg/ports listing (Levenshtein ≤ ⌈len/4⌉). +auto vcpkg_probe_fuzzy(const std::string& name) -> util::Result; + // Caller-supplied closure that runs `cargoxx build` (or any equivalent // build) on a project rooted at the given path. Injected so the resolver // stays decoupled from `cargoxx.cli`. diff --git a/src/util/levenshtein.cpp b/src/util/levenshtein.cpp new file mode 100644 index 0000000..d67e352 --- /dev/null +++ b/src/util/levenshtein.cpp @@ -0,0 +1,30 @@ +module cargoxx.util; + +import std; + +namespace cargoxx::util { + +auto levenshtein(std::string_view a, std::string_view b) -> std::size_t { + if (a.size() < b.size()) { + std::swap(a, b); + } + std::vector prev(b.size() + 1); + std::vector curr(b.size() + 1); + std::iota(prev.begin(), prev.end(), std::size_t{0}); + + for (std::size_t i = 1; i <= a.size(); ++i) { + curr[0] = i; + for (std::size_t j = 1; j <= b.size(); ++j) { + auto cost = (a[i - 1] == b[j - 1]) ? std::size_t{0} : std::size_t{1}; + curr[j] = std::min({ + prev[j] + 1, + curr[j - 1] + 1, + prev[j - 1] + cost, + }); + } + std::swap(prev, curr); + } + return prev[b.size()]; +} + +} // namespace cargoxx::util diff --git a/src/util/util.cppm b/src/util/util.cppm index 2da2707..0313b49 100644 --- a/src/util/util.cppm +++ b/src/util/util.cppm @@ -47,6 +47,11 @@ using Result = std::expected; auto format(const Error& e) -> std::string; +// Classic Levenshtein edit distance. Used by the resolver's +// Conan/vcpkg fuzzy match when the user's nixpkgs name doesn't appear +// verbatim in those repositories' indexes (e.g. `sqlite` ↔ `sqlite3`). +auto levenshtein(std::string_view a, std::string_view b) -> std::size_t; + // Returns true if `version` (e.g. "10.2", "1.84.0") satisfies `range`. // // Supported range syntax: