From: Simon Glass <simon.glass@canonical.com> Add a way to do static preprocessor analysis using debug information from compiled code. This reads the DWARF tables to determin which lines produced code. Co-developed-by: Claude <noreply@anthropic.com> Signed-off-by: Simon Glass <simon.glass@canonical.com> --- tools/codman/dwarf.py | 200 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 tools/codman/dwarf.py diff --git a/tools/codman/dwarf.py b/tools/codman/dwarf.py new file mode 100644 index 00000000000..adceac9d20a --- /dev/null +++ b/tools/codman/dwarf.py @@ -0,0 +1,200 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2025 Canonical Ltd +# +"""DWARF debug info-based line-level analysis for source code. + +This module provides functionality to analyse which lines in source files +were compiled by extracting line information from DWARF debug data in +object files. +""" + +import multiprocessing +import os +import subprocess +from collections import defaultdict + +from u_boot_pylib import tout +from analyser import Analyser, FileResult + + +def worker(args): + """Extract line numbers from DWARF debug info in an object file. + + Uses readelf --debug-dump=decodedline to get the line table, then parses + section headers and line entries to determine which source lines were + compiled into the object. + + Args: + args (tuple): Tuple of (obj_path, build_dir, srcdir) + + Returns: + tuple: (source_lines_dict, error_msg) where source_lines_dict is a + mapping of source file paths to sets of line numbers, and + error_msg is None on success or an error string on failure + """ + obj_path, build_dir, srcdir = args + source_lines = defaultdict(set) + + # Get the directory of the .o file relative to build_dir + rel_to_build = os.path.relpath(obj_path, build_dir) + obj_dir = os.path.dirname(rel_to_build) + + # Use readelf to extract decoded line information + try: + result = subprocess.run( + ['readelf', '--debug-dump=decodedline', obj_path], + capture_output=True, text=True, check=False, + encoding='utf-8', errors='ignore') + if result.returncode != 0: + error_msg = (f'readelf failed on {obj_path} with return code ' + f'{result.returncode}\nstderr: {result.stderr}') + return (source_lines, error_msg) + + # Parse the output + # Format is: Section header with full path, then data lines + current_file = None + for line in result.stdout.splitlines(): + # Skip header lines and empty lines + if not line or line.startswith('Contents of') or \ + line.startswith('File name') or line.strip() == '' or \ + line.startswith(' '): + continue + + # Look for section headers with full path (e.g., '/path/to/file.c:') + if line.endswith(':'): + header_path = line.rstrip(':') + # Try to resolve the path + if os.path.isabs(header_path): + # Absolute path in DWARF + abs_path = os.path.realpath(header_path) + else: + # Relative path - try relative to srcdir and obj_dir + abs_path = os.path.realpath( + os.path.join(srcdir, obj_dir, header_path)) + if not os.path.exists(abs_path): + abs_path = os.path.realpath( + os.path.join(srcdir, header_path)) + + if os.path.exists(abs_path): + current_file = abs_path + continue + + # Parse data lines - use current_file from section header + if current_file: + parts = line.split() + if len(parts) >= 2: + try: + line_num = int(parts[1]) + # Skip special line numbers (like '-') + if line_num > 0: + source_lines[current_file].add(line_num) + except (ValueError, IndexError): + continue + except (OSError, subprocess.SubprocessError) as e: + error_msg = f'Failed to execute readelf on {obj_path}: {e}' + return (source_lines, error_msg) + + return (source_lines, None) + + +# pylint: disable=too-few-public-methods +class DwarfAnalyser(Analyser): + """Analyser that uses DWARF debug info to determine active lines. + + This analyser extracts line number information from DWARF debug data in + compiled object files to determine which source lines generated code. + """ + def __init__(self, build_dir, srcdir, used_sources, keep_temps=False): + """Initialise the DWARF analyser. + + Args: + build_dir (str): Build directory containing .o files + srcdir (str): Path to source root directory + used_sources (set): Set of source files that are compiled + keep_temps (bool): If True, keep temporary files for debugging + """ + super().__init__(srcdir, keep_temps) + self.build_dir = build_dir + self.used_sources = used_sources + + def extract_lines(self, jobs=None): + """Extract used line numbers from DWARF debug info in object files. + + Args: + jobs (int): Number of parallel jobs (None = use all CPUs) + + Returns: + dict: Mapping of source file paths to sets of line numbers that + generated code + """ + # Find all .o files + obj_files = self.find_object_files(self.build_dir) + + if not obj_files: + return defaultdict(set) + + # Prepare arguments for parallel processing + args_list = [(obj_path, self.build_dir, self.srcdir) + for obj_path in obj_files] + + # Process in parallel + num_jobs = jobs if jobs else multiprocessing.cpu_count() + with multiprocessing.Pool(num_jobs) as pool: + results = pool.map(worker, args_list) + + # Merge results from all workers and check for errors + source_lines = defaultdict(set) + errors = [] + for result_dict, error_msg in results: + if error_msg: + errors.append(error_msg) + else: + for source_file, lines in result_dict.items(): + source_lines[source_file].update(lines) + + # Report any errors + if errors: + for error in errors: + tout.error(error) + tout.fatal(f'readelf failed on {len(errors)} object file(s)') + + return source_lines + + def process(self, jobs=None): + """Perform line-level analysis using DWARF debug info. + + Args: + jobs (int): Number of parallel jobs (None = use all CPUs) + + Returns: + dict: Mapping of source file paths to FileResult named tuples + """ + tout.progress('Extracting DWARF line information...') + dwarf_line_map = self.extract_lines(jobs) + + file_results = {} + for source_file in self.used_sources: + abs_path = os.path.realpath(source_file) + used_lines = dwarf_line_map.get(abs_path, set()) + + # Count total lines in the file + total_lines = self.count_lines(abs_path) + + active_lines = len(used_lines) + inactive_lines = total_lines - active_lines + + # Create line status dict + line_status = {} + for i in range(1, total_lines + 1): + line_status[i] = 'active' if i in used_lines else 'inactive' + + file_results[abs_path] = FileResult( + total_lines=total_lines, + active_lines=active_lines, + inactive_lines=inactive_lines, + line_status=line_status + ) + + tout.info(f'Analysed {len(file_results)} files using DWARF debug info') + return file_results -- 2.43.0