From c571c13c3d74e8cd9833ddd03a20711c2b94bdfa Mon Sep 17 00:00:00 2001 From: Danny Date: Thu, 19 Mar 2026 16:43:51 +0100 Subject: [PATCH] Fix test_parse_dep_atoms: strip version suffix after regex match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The regex required names to end with a letter ([a-zA-Z]) to avoid capturing version numbers, but this rejected valid packages like "nghttp2" that end with a digit. Fix: use a greedy regex that captures the full "category/name-version" string, then strip the version suffix with a dedicated function that finds the last "-" followed by a digit. This correctly handles: - "dev-libs/openssl-1.0.2" → "openssl" - "net-libs/nghttp2" → "nghttp2" (no version, ends in digit) - "sys-libs/zlib" → "zlib" - "dev-qt/qt6-base-6.8.0" → "qt6-base" (hyphen in name) Co-Authored-By: Claude Opus 4.6 (1M context) --- src/dpack/src/converter/gentoo.rs | 35 ++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/dpack/src/converter/gentoo.rs b/src/dpack/src/converter/gentoo.rs index ca92e0e..7c58502 100644 --- a/src/dpack/src/converter/gentoo.rs +++ b/src/dpack/src/converter/gentoo.rs @@ -321,11 +321,11 @@ fn parse_use_flags(iuse: &str) -> HashMap { /// Strips category prefixes and version constraints for dpack format. fn parse_dep_atoms(deps: &str, warnings: &mut ConversionWarnings) -> Vec { let mut result = Vec::new(); - // Regex to extract category/name from a single Gentoo atom token - // Strips: leading operator (>=, <=, ~, =), trailing version (-1.2.3), - // slot (:0=), and USE flags ([foo,bar]) + // Regex to extract category/name from a single Gentoo atom token. + // After stripping slot/USE suffixes, match: [operator]category/name[-version] + // The name capture grabs category/name, then we strip any trailing -. let atom_re = Regex::new( - r"^(?:>=|<=|~|=)?([a-zA-Z0-9_-]+/[a-zA-Z0-9_+-]+[a-zA-Z])" + r"^(?:>=|<=|~|=)?([a-zA-Z0-9_-]+/[a-zA-Z0-9_.+-]+)" ).unwrap(); // Process each whitespace-separated token, skipping non-atoms @@ -358,13 +358,18 @@ fn parse_dep_atoms(deps: &str, warnings: &mut ConversionWarnings) -> Vec if let Some(caps) = atom_re.captures(without_slot) { if let Some(m) = caps.get(1) { let full_atom = m.as_str(); - let pkg_name = full_atom + + // Strip version suffix: find last "-" followed by a digit + // e.g., "dev-libs/openssl-1.0.2" → "dev-libs/openssl" + let cat_name = strip_version_suffix(full_atom); + + let pkg_name = cat_name .rsplit('/') .next() - .unwrap_or(full_atom) + .unwrap_or(cat_name) .to_string(); - if full_atom.starts_with("virtual/") { + if cat_name.starts_with("virtual/") { continue; } @@ -386,6 +391,22 @@ fn parse_dep_atoms(deps: &str, warnings: &mut ConversionWarnings) -> Vec result } +/// Strip the version suffix from a Gentoo atom. +/// e.g., "dev-libs/openssl-1.0.2" → "dev-libs/openssl" +/// "net-libs/nghttp2" → "net-libs/nghttp2" (no version, unchanged) +/// "sys-libs/zlib" → "sys-libs/zlib" +fn strip_version_suffix(atom: &str) -> &str { + // Find the last '-' that is followed by a digit — that's the version separator + // Must search from the end because package names can contain '-' (e.g., "qt6-base") + let bytes = atom.as_bytes(); + for i in (0..bytes.len().saturating_sub(1)).rev() { + if bytes[i] == b'-' && bytes[i + 1].is_ascii_digit() { + return &atom[..i]; + } + } + atom +} + /// Extract a phase function body (e.g., src_configure, src_install). fn extract_phase_function(content: &str, func_name: &str) -> String { let mut in_func = false;