From: Simon Glass <simon.glass@canonical.com> When analysing code by functional area, a machine-readable format is needed for spreadsheet analysis and further processing. Add a --csv option to generate CSV reports with category and feature columns. The category system matches source files to functional areas defined in category.cfg using exact paths, glob patterns, and directory prefixes. Add a -F/--files-only flag for simplified output with just file rows, -u/--show-unmatched to list uncategorised files, and -E/--show-empty-features to list placeholder features. An [ignore] section in category.cfg allows excluding external code from reports. Co-developed-by: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: Simon Glass <simon.glass@canonical.com> --- tools/codman/codman.py | 19 ++- tools/codman/codman.rst | 48 ++++++++ tools/codman/output.py | 209 ++++++++++++++++++++++++++++++++- tools/codman/test_category.py | 45 ++++++++ tools/codman/test_output.py | 212 ++++++++++++++++++++++++++++++++++ 5 files changed, 531 insertions(+), 2 deletions(-) create mode 100644 tools/codman/test_output.py diff --git a/tools/codman/codman.py b/tools/codman/codman.py index 893512456b2..7c6da823611 100755 --- a/tools/codman/codman.py +++ b/tools/codman/codman.py @@ -488,6 +488,14 @@ def parse_args(argv=None): help='Show line counts in kilolines (kLOC) instead of lines') dirs.add_argument('--html', type=str, metavar='FILE', help='Output results as HTML to the specified file') + dirs.add_argument('--csv', type=str, metavar='FILE', + help='Output results as CSV to the specified file') + dirs.add_argument('-u', '--show-unmatched', action='store_true', + help='List all files without a category match') + dirs.add_argument('-F', '--files-only', action='store_true', + help='Only output file rows in CSV (exclude directories)') + dirs.add_argument('-E', '--show-empty-features', action='store_true', + help='List features with no files defined') # detail command detail = subparsers.add_parser('detail', @@ -611,8 +619,9 @@ def do_output(args, all_srcs, used, skipped, results, srcdir, analysis_method): elif args.cmd == 'copy-used': ok = output.copy_used_files(used, srcdir, args.copy_used) elif args.cmd == 'dirs': - # Check if HTML output is requested + # Check if HTML or CSV output is requested html_file = getattr(args, 'html', None) + csv_file = getattr(args, 'csv', None) if html_file: ok = output.generate_html_breakdown(all_srcs, used, results, srcdir, args.subdirs, args.show_files, @@ -620,6 +629,14 @@ def do_output(args, all_srcs, used, skipped, results, srcdir, analysis_method): getattr(args, 'kloc', False), html_file, args.board, analysis_method) + elif csv_file: + ok = output.generate_csv( + all_srcs, used, results, srcdir, args.subdirs, + args.show_files, args.show_empty, + getattr(args, 'kloc', False), csv_file, + getattr(args, 'show_unmatched', False), + getattr(args, 'files_only', False), + getattr(args, 'show_empty_features', False)) else: ok = output.show_dir_breakdown(all_srcs, used, results, srcdir, args.subdirs, args.show_files, diff --git a/tools/codman/codman.rst b/tools/codman/codman.rst index c651fd6514e..a9f361c7c70 100644 --- a/tools/codman/codman.rst +++ b/tools/codman/codman.rst @@ -138,6 +138,10 @@ The ``dirs command`` has a few extra options: * ``-e, --show-empty`` - Show directories/files with 0 lines used * ``-k, --kloc`` - Show line counts in kilolines (kLOC) instead of raw lines * ``--html <file>`` - Generate an HTML report with collapsible drill-down +* ``--csv <file>`` - Generate a CSV report for spreadsheet analysis +* ``-F, --files-only`` - Only output file rows in CSV (exclude directories) +* ``-u, --show-unmatched`` - List files without a category match +* ``-E, --show-empty-features`` - List features with no files defined Other: @@ -312,6 +316,39 @@ The HTML report includes: This is useful for sharing reports or exploring large codebases interactively in a web browser. +CSV Reports (``dirs --csv``) +---------------------------- + +Generate a CSV report for spreadsheet analysis or further processing:: + + codman -b qemu-x86 dirs -sf --csv report.csv + +The CSV includes columns for Type, Path, Category, Feature, file counts, and +line statistics:: + + Type,Path,Category,Feature,Files,Used,%Used,%Code,Lines,Used + dir,arch/x86/cpu,,,20,15,75,85,3816,3227 + file,arch/x86/cpu/call32.S,load-boot,boot-x86-bare,,,,100,61,61 + file,arch/x86/cpu/cpu.c,load-boot,boot-x86-bare,,,,88,399,353 + ... + +Use ``-F`` (``--files-only``) for a simplified output with just file rows +(no directory summaries):: + + codman -b qemu-x86 dirs -sf --csv report.csv -F + +This produces cleaner output with columns: Path, Category, Feature, %Code, +Lines, Used:: + + Path,Category,Feature,%Code,Lines,Used + arch/x86/cpu/call32.S,load-boot,boot-x86-bare,100,61,61 + arch/x86/cpu/cpu.c,load-boot,boot-x86-bare,88,399,353 + arch/x86/cpu/cpu_x86.c,load-boot,boot-x86-bare,100,99,99 + ... + +CSV reports include category information from ``category.cfg``. Other output +formats (terminal, HTML) do not yet use categories. + Categories and Features ----------------------- @@ -343,6 +380,17 @@ Example category.cfg structure:: "boot/image-board.c", ] +When generating HTML reports, codman matches each source file to its feature +and category, making it easy to analyse code by functional area. + +Use ``-u`` (``--show-unmatched``) to list files that don't match any feature:: + + codman -b qemu-x86 dirs -sf -u + +Use ``-E`` (``--show-empty-features``) to list features with no files defined:: + + codman -b qemu-x86 dirs -sf -E + **Ignoring External Code** The ``[ignore]`` section in category.cfg can exclude external/vendored code diff --git a/tools/codman/output.py b/tools/codman/output.py index 67d8f98a649..9c855207435 100644 --- a/tools/codman/output.py +++ b/tools/codman/output.py @@ -14,6 +14,7 @@ formats: - File copying operations """ +import csv import os import shutil import sys @@ -23,6 +24,8 @@ from collections import defaultdict sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from u_boot_pylib import terminal, tout # pylint: disable=wrong-import-position +import category + class DirStats: # pylint: disable=too-few-public-methods """Statistics for a directory. @@ -507,7 +510,7 @@ def generate_html_breakdown(all_sources, used_sources, file_results, srcdir, use_kloc (bool): If True, show line counts in kLOC html_file (str): Path to output HTML file board (str): Board name (optional) - analysis_method (str): Analysis method used ('unifdef', 'lsp', or 'dwarf') + analysis_method (str): Analysis method ('unifdef'/'lsp'/'dwarf') Returns: bool: True on success @@ -907,6 +910,210 @@ def generate_html_breakdown(all_sources, used_sources, file_results, srcdir, return False +def _write_file_row(writer, info, features, ignore_patterns, file_results, + use_kloc, files_only): + """Write a single file row to CSV. + + Args: + writer: CSV writer object + info (dict): File info with 'path', 'total', 'active' keys + features (dict): Features dict from category config + ignore_patterns (list): List of patterns to ignore + file_results (dict): File analysis results (or None) + use_kloc (bool): If True, show line counts in kLOC + files_only (bool): If True, use simplified row format + + Returns: + tuple: (wrote_row, is_matched) - whether row was written, whether file + matched a category + """ + # Skip ignored files (external code) + if category.should_ignore_file(info['path'], ignore_patterns): + return False, True # Not written, but considered matched + + # Match file to feature/category + feat_id, cat_id = None, None + if features: + feat_id, cat_id = category.get_file_feature(info['path'], features) + + is_matched = feat_id is not None + + if file_results: + pct_active = percent(info['active'], info['total']) + + if use_kloc: + total_str = klocs(info['total']) + active_str = klocs(info['active']) + else: + total_str = info['total'] + active_str = info['active'] + + if files_only: + writer.writerow([info['path'], cat_id or '', feat_id or '', + f'{pct_active:.0f}', total_str, active_str]) + else: + writer.writerow(['file', info['path'], cat_id or '', feat_id or '', + '', '', '', f'{pct_active:.0f}', + total_str, active_str]) + + return True, is_matched + + +def _report_matching_stats(features, total_files, unmatched_files, + show_unmatched, show_empty_features): + """Report category matching statistics. + + Args: + features (dict): Features dict from category config + total_files (int): Total number of files processed + unmatched_files (list): List of file paths without category match + show_unmatched (bool): If True, list all unmatched files + show_empty_features (bool): If True, list features with no files + """ + if features and total_files > 0: + matched = total_files - len(unmatched_files) + print(f'Category matching: {matched}/{total_files} files matched, ' + f'{len(unmatched_files)} unmatched') + if show_unmatched and unmatched_files: + print('Unmatched files:') + for filepath in sorted(unmatched_files): + print(f' {filepath}') + + if features and show_empty_features: + empty_features = [ + feat_id for feat_id, feat_data in features.items() + if not feat_data.get('files', []) + ] + if empty_features: + print(f'Features with no files ({len(empty_features)}):') + for feat_id in sorted(empty_features): + print(f' {feat_id}') + + +def generate_csv(all_sources, used_sources, file_results, srcdir, + by_subdirs, show_files, show_empty, use_kloc, csv_file, + show_unmatched=False, files_only=False, + show_empty_features=False): + """Generate CSV output with directory breakdown. + + Args: + all_sources (set): Set of all source file paths + used_sources (set): Set of used source file paths + file_results (dict): Optional dict mapping file paths to line analysis + results (or None) + srcdir (str): Root directory of the source tree + by_subdirs (bool): If True, show full subdirectory breakdown + show_files (bool): If True, show individual files within directories + show_empty (bool): If True, show directories with 0 lines used + use_kloc (bool): If True, show line counts in kLOC + csv_file (str): Path to output CSV file + show_unmatched (bool): If True, list all unmatched files to stdout + files_only (bool): If True, only output file rows (exclude directories) + show_empty_features (bool): If True, list features with no files defined + + Returns: + bool: True on success + """ + + # Load category configuration for file-to-feature matching + cfg = category.load_category_config(srcdir) + features = cfg.features if cfg else None + ignore_patterns = cfg.ignore if cfg else None + + # Collect directory statistics + dir_stats = collect_dir_stats(all_sources, used_sources, file_results, + srcdir, by_subdirs, show_files) + + # Calculate totals + total_lines_all = sum(count_lines(f) for f in all_sources) + if file_results: + total_lines_used = sum(r.active_lines for r in file_results.values()) + else: + total_lines_used = sum(count_lines(f) for f in used_sources) + + # Track unmatched files + unmatched_files = [] + total_files = 0 + + try: + with open(csv_file, 'w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + + # Write header + lines_header = 'kLOC' if use_kloc else 'Lines' + if files_only: + writer.writerow(['Path', 'Category', 'Feature', '%Code', + lines_header, 'Used']) + else: + writer.writerow(['Type', 'Path', 'Category', 'Feature', 'Files', + 'Used', '%Used', '%Code', lines_header, 'Used']) + + # Sort and output directories + for dir_path in sorted(dir_stats.keys()): + stats = dir_stats[dir_path] + + # Skip directories with 0 lines used unless show_empty is set + if not show_empty and stats.lines_used == 0: + continue + + pct_used = percent(stats.used, stats.total) + pct_code = percent(stats.lines_used, stats.lines_total) + + if use_kloc: + lines_total_str = klocs(stats.lines_total) + lines_used_str = klocs(stats.lines_used) + else: + lines_total_str = stats.lines_total + lines_used_str = stats.lines_used + + if not files_only: + writer.writerow([ + 'dir', dir_path, '', '', stats.total, stats.used, + f'{pct_used:.0f}', f'{pct_code:.0f}', + lines_total_str, lines_used_str]) + + # Output files if requested + if show_files and stats.files: + sorted_files = sorted( + stats.files, key=lambda x: os.path.basename(x['path'])) + + for info in sorted_files: + if not show_empty and info['active'] == 0: + continue + + wrote, matched = _write_file_row( + writer, info, features, ignore_patterns, + file_results, use_kloc, files_only) + if wrote: + total_files += 1 + if not matched: + unmatched_files.append(info['path']) + + # Write totals row + pct_files = percent(len(used_sources), len(all_sources)) + pct_code = percent(total_lines_used, total_lines_all) + + if use_kloc: + total_str = klocs(total_lines_all) + used_str = klocs(total_lines_used) + else: + total_str = total_lines_all + used_str = total_lines_used + + if not files_only: + writer.writerow(['total', 'TOTAL', '', '', len(all_sources), + len(used_sources), f'{pct_files:.0f}', + f'{pct_code:.0f}', total_str, used_str]) + + tout.info(f'CSV report written to: {csv_file}') + _report_matching_stats(features, total_files, unmatched_files, + show_unmatched, show_empty_features) + return True + except IOError as e: + tout.error(f'Failed to write CSV file: {e}') + return False + + def show_statistics(all_sources, used_sources, skipped_sources, file_results, srcdir, top_n): """Show overall statistics about source file usage. diff --git a/tools/codman/test_category.py b/tools/codman/test_category.py index 3ce89d70b18..475c69df75a 100644 --- a/tools/codman/test_category.py +++ b/tools/codman/test_category.py @@ -194,6 +194,51 @@ files = [] self.assertIn('test', result.categories) +class TestShouldIgnoreFile(unittest.TestCase): + """Test cases for should_ignore_file function""" + + def test_ignore_directory_prefix(self): + """Test ignoring files by directory prefix""" + ignore = ['lib/external/'] + self.assertTrue(category.should_ignore_file( + 'lib/external/foo.c', ignore)) + self.assertTrue(category.should_ignore_file( + 'lib/external/sub/bar.c', ignore)) + self.assertFalse(category.should_ignore_file( + 'lib/internal/foo.c', ignore)) + + def test_ignore_exact_path(self): + """Test ignoring files by exact path""" + ignore = ['lib/external/specific.c'] + self.assertTrue(category.should_ignore_file( + 'lib/external/specific.c', ignore)) + self.assertFalse(category.should_ignore_file( + 'lib/external/other.c', ignore)) + + def test_ignore_glob_pattern(self): + """Test ignoring files by glob pattern""" + ignore = ['lib/external/*.c'] + self.assertTrue(category.should_ignore_file( + 'lib/external/foo.c', ignore)) + self.assertFalse(category.should_ignore_file( + 'lib/external/foo.h', ignore)) + + def test_empty_ignore_list(self): + """Test with empty ignore list""" + self.assertFalse(category.should_ignore_file('any/file.c', [])) + self.assertFalse(category.should_ignore_file('any/file.c', None)) + + def test_multiple_ignore_patterns(self): + """Test with multiple ignore patterns""" + ignore = ['lib/external/', 'vendor/*.c'] + self.assertTrue(category.should_ignore_file( + 'lib/external/foo.c', ignore)) + self.assertTrue(category.should_ignore_file( + 'vendor/bar.c', ignore)) + self.assertFalse(category.should_ignore_file( + 'src/main.c', ignore)) + + class TestHelperFunctions(unittest.TestCase): """Test cases for helper functions""" diff --git a/tools/codman/test_output.py b/tools/codman/test_output.py new file mode 100644 index 00000000000..126ea95af57 --- /dev/null +++ b/tools/codman/test_output.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# Copyright 2025 Canonical Ltd +# +"""Unit tests for output.py CSV generation""" + +import csv +import os +import shutil +import sys +import tempfile +import unittest +from collections import namedtuple + +# Test configuration +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# Import the module to test +sys.path.insert(0, SCRIPT_DIR) +sys.path.insert(0, os.path.join(SCRIPT_DIR, '..')) +import output # pylint: disable=wrong-import-position +from u_boot_pylib import tools # pylint: disable=wrong-import-position + + +# Mock FileResult for testing +FileResult = namedtuple('FileResult', + ['total_lines', 'active_lines', 'inactive_lines']) + + +class TestGenerateCsv(unittest.TestCase): + """Test cases for generate_csv function""" + + def setUp(self): + """Create temporary directory with test files""" + self.test_dir = tempfile.mkdtemp(prefix='test_output_') + + # Create source files + self.src_dir = os.path.join(self.test_dir, 'src') + os.makedirs(os.path.join(self.src_dir, 'boot')) + os.makedirs(os.path.join(self.src_dir, 'drivers', 'net')) + os.makedirs(os.path.join(self.src_dir, 'tools', 'codman')) + + # Create test source files with known content + self.files = { + 'boot/bootm.c': '// boot\n' * 100, + 'boot/image.c': '// image\n' * 50, + 'drivers/net/eth.c': '// eth\n' * 200, + } + for path, content in self.files.items(): + full_path = os.path.join(self.src_dir, path) + tools.write_file(full_path, content, binary=False) + + # Create category.cfg + cfg_content = ''' +[categories.load-boot] +description = "Loading & Boot" + +[categories.drivers] +description = "Drivers" + +[features.boot-core] +category = "load-boot" +description = "Core boot" +files = ["boot/"] + +[features.ethernet] +category = "drivers" +description = "Ethernet" +files = ["drivers/net/"] +''' + cfg_path = os.path.join(self.src_dir, 'tools', 'codman', 'category.cfg') + tools.write_file(cfg_path, cfg_content, binary=False) + + self.csv_file = os.path.join(self.test_dir, 'report.csv') + + def tearDown(self): + """Clean up temporary directory""" + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_csv_basic(self): + """Test basic CSV generation""" + all_sources = { + os.path.join(self.src_dir, p) for p in self.files + } + used_sources = all_sources.copy() + + result = output.generate_csv( + all_sources, used_sources, None, self.src_dir, + by_subdirs=True, show_files=True, show_empty=False, + use_kloc=False, csv_file=self.csv_file) + + self.assertTrue(result) + self.assertTrue(os.path.exists(self.csv_file)) + + # Read and verify CSV content + data = tools.read_file(self.csv_file, binary=False) + rows = list(csv.reader(data.splitlines())) + + # Check header + self.assertEqual(rows[0][0], 'Type') + self.assertEqual(rows[0][1], 'Path') + self.assertEqual(rows[0][2], 'Category') + self.assertEqual(rows[0][3], 'Feature') + + def test_csv_files_only(self): + """Test CSV generation with files_only option""" + all_sources = { + os.path.join(self.src_dir, p) for p in self.files + } + used_sources = all_sources.copy() + + result = output.generate_csv( + all_sources, used_sources, None, self.src_dir, + by_subdirs=True, show_files=True, show_empty=False, + use_kloc=False, csv_file=self.csv_file, files_only=True) + + self.assertTrue(result) + + data = tools.read_file(self.csv_file, binary=False) + rows = list(csv.reader(data.splitlines())) + + # Check simplified header for files_only + self.assertEqual(rows[0][0], 'Path') + self.assertEqual(rows[0][1], 'Category') + self.assertEqual(rows[0][2], 'Feature') + self.assertEqual(rows[0][3], '%Code') + + # No 'dir' or 'total' rows + for row in rows[1:]: + self.assertNotIn(row[0], ['dir', 'total']) + + def test_csv_category_matching(self): + """Test that files are matched to correct categories""" + all_sources = { + os.path.join(self.src_dir, p) for p in self.files + } + used_sources = all_sources.copy() + + # Create mock file results + file_results = {} + for path, content in self.files.items(): + full_path = os.path.join(self.src_dir, path) + lines = len(content.split('\n')) + file_results[full_path] = FileResult(lines, lines, 0) + + result = output.generate_csv( + all_sources, used_sources, file_results, self.src_dir, + by_subdirs=True, show_files=True, show_empty=False, + use_kloc=False, csv_file=self.csv_file, files_only=True) + + self.assertTrue(result) + + data = tools.read_file(self.csv_file, binary=False) + rows = list(csv.reader(data.splitlines())) + + # Find boot files and verify category + boot_rows = [r for r in rows[1:] if 'boot/' in r[0]] + self.assertEqual(len(boot_rows), 2) # bootm.c and image.c + for row in boot_rows: + self.assertEqual(row[1], 'load-boot') + self.assertEqual(row[2], 'boot-core') + + # Find driver files and verify category + driver_rows = [r for r in rows[1:] if 'drivers/' in r[0]] + self.assertEqual(len(driver_rows), 1) # eth.c + for row in driver_rows: + self.assertEqual(row[1], 'drivers') + self.assertEqual(row[2], 'ethernet') + + def test_csv_with_ignore(self): + """Test CSV generation with ignored files""" + # Add ignore section to config + cfg_path = os.path.join(self.src_dir, 'tools', 'codman', 'category.cfg') + existing = tools.read_file(cfg_path, binary=False) + tools.write_file(cfg_path, + existing + '\n[ignore]\nfiles = ["drivers/net/"]\n', + binary=False) + + all_sources = { + os.path.join(self.src_dir, p) for p in self.files + } + used_sources = all_sources.copy() + + # Create mock file results + file_results = {} + for path, content in self.files.items(): + full_path = os.path.join(self.src_dir, path) + lines = len(content.split('\n')) + file_results[full_path] = FileResult(lines, lines, 0) + + result = output.generate_csv( + all_sources, used_sources, file_results, self.src_dir, + by_subdirs=True, show_files=True, show_empty=False, + use_kloc=False, csv_file=self.csv_file, files_only=True) + + self.assertTrue(result) + + data = tools.read_file(self.csv_file, binary=False) + rows = list(csv.reader(data.splitlines())) + + # Verify ignored files are not in output + paths = [r[0] for r in rows[1:]] + self.assertFalse(any('drivers/net/' in p for p in paths)) + + # Boot files should still be there + self.assertTrue(any('boot/' in p for p in paths)) + + +if __name__ == '__main__': + unittest.main() -- 2.43.0