From: Simon Glass <simon.glass@canonical.com> Some toolchain tarballs from kernel.org contain symlinks with a doubled cross-compile prefix, e.g. 'x86_64-linux-x86_64-linux-gcc' alongside the correct 'x86_64-linux-gcc'. This causes buildman to print a warning about ambiguous toolchains when downloading. Add a regex-based check to detect and filter out these malformed binaries during toolchain scanning. When verbose output is enabled, these files are shown as "ignoring ... (doubled prefix)" rather than "found ..." Co-developed-by: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: Simon Glass <simon.glass@canonical.com> --- tools/buildman/test.py | 24 ++++++++++++++++++++++++ tools/buildman/toolchain.py | 25 +++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/tools/buildman/test.py b/tools/buildman/test.py index da6df1f173c..0f4a5b9e543 100644 --- a/tools/buildman/test.py +++ b/tools/buildman/test.py @@ -744,6 +744,30 @@ class TestBuild(TestBuildBase): # Verify downloaded priority beats system priority self.assertLess(toolchain.PRIORITY_DOWNLOADED, sys_tc.priority) + def test_is_doubled_prefix(self): + """Test detection of doubled toolchain prefixes""" + # Valid toolchain names (not doubled) + self.assertFalse( + toolchain.Toolchains.is_doubled_prefix('aarch64-linux-gcc')) + self.assertFalse( + toolchain.Toolchains.is_doubled_prefix('x86_64-linux-gcc')) + self.assertFalse( + toolchain.Toolchains.is_doubled_prefix('arm-linux-gnueabi-gcc')) + self.assertFalse( + toolchain.Toolchains.is_doubled_prefix('gcc')) + + # Doubled prefixes (should be filtered out) + self.assertTrue( + toolchain.Toolchains.is_doubled_prefix( + 'aarch64-linux-aarch64-linux-gcc')) + self.assertTrue( + toolchain.Toolchains.is_doubled_prefix( + 'x86_64-linux-x86_64-linux-gcc')) + + # Not a gcc file + self.assertFalse( + toolchain.Toolchains.is_doubled_prefix('aarch64-linux-ld')) + def test_get_env_args(self): """Test the GetEnvArgs() function""" tc = self.toolchains.select('arm') diff --git a/tools/buildman/toolchain.py b/tools/buildman/toolchain.py index 8f3d3ab3b0c..27302f20d42 100644 --- a/tools/buildman/toolchain.py +++ b/tools/buildman/toolchain.py @@ -30,6 +30,10 @@ from u_boot_pylib import tools # Environment variable / argument types for get_env_args() (VAR_CROSS_COMPILE, VAR_PATH, VAR_ARCH, VAR_MAKE_ARGS) = range(4) +# Matches a repeated prefix, e.g. 'aarch64-linux-aarch64-linux-gcc' +RE_DOUBLED_PREFIX = re.compile(r'^(.+)\1gcc$') + + class MyHTMLParser(HTMLParser): """Simple class to collect links from a page @@ -378,6 +382,22 @@ class Toolchains: f"toolchain for arch '{toolchain.arch}' has priority " f"{self.toolchains[toolchain.arch].priority}") + @staticmethod + def is_doubled_prefix(fname): + """Check if a gcc filename has a doubled prefix + + Some toolchain tarballs contain symlinks with the cross-compile prefix + repeated, e.g. 'x86_64-linux-x86_64-linux-gcc'. These are not valid + toolchains and should be ignored. + + Args: + fname (str): Filename to check (basename, not full path) + + Returns: + bool: True if the prefix is doubled, False otherwise + """ + return bool(RE_DOUBLED_PREFIX.match(fname)) + def scan_path(self, path, verbose): """Scan a path for a valid toolchain @@ -394,6 +414,11 @@ class Toolchains: if verbose: print(f" - looking in '{dirname}'") for fname in glob.glob(dirname + '/*gcc'): + basename = os.path.basename(fname) + if self.is_doubled_prefix(basename): + if verbose: + print(f" - ignoring '{fname}' (doubled prefix)") + continue if verbose: print(f" - found '{fname}'") fnames.append(fname) -- 2.43.0