[PATCH 0/2] pickman: Fix robustness issues with error handling and large prompts
From: Simon Glass <sjg@chromium.org> Fix two bugs in pickman: one where broad exception handling causes data loss in the database migration path, and another where embedding large CI log tails in the prompt exceeds the Linux argument-size limit. Simon Glass (2): pickman: Write pipeline-fix log tails to temp files pickman: Only treat missing table as fresh database tools/pickman/agent.py | 82 +++++++++++++++++------ tools/pickman/database.py | 14 +++- tools/pickman/ftest.py | 134 +++++++++++++++++++++++++++++++++++--- 3 files changed, 198 insertions(+), 32 deletions(-) -- 2.43.0 base-commit: e1c2d707a19641df66a1c60b66909cb99d2627d9 branch: pickh
From: Simon Glass <simon.glass@canonical.com> The Claude Agent SDK passes the prompt as a command-line argument to the claude subprocess. Linux enforces a 128 KB limit per argument (MAX_ARG_STRLEN), which is exceeded when multiple CI jobs fail and their log tails (up to 200 lines each) are embedded in the prompt. Write job log tails and large MR descriptions to temporary files under a pickman-logs-* directory, and reference the file paths in the prompt so the agent reads them with its Read tool. The temp directory is cleaned up after the agent finishes. When no tempdir is provided, the original inline behaviour is preserved. Signed-off-by: Simon Glass <simon.glass@canonical.com> --- tools/pickman/agent.py | 82 ++++++++++++++++++++++++--------- tools/pickman/ftest.py | 100 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 152 insertions(+), 30 deletions(-) diff --git a/tools/pickman/agent.py b/tools/pickman/agent.py index 023b65b5d36..7e482a7b8ee 100644 --- a/tools/pickman/agent.py +++ b/tools/pickman/agent.py @@ -7,7 +7,9 @@ import asyncio import os +import shutil import sys +import tempfile # Allow 'from pickman import xxx' to work via symlink our_path = os.path.dirname(os.path.realpath(__file__)) @@ -540,9 +542,15 @@ def handle_mr_comments(mr_iid, branch_name, comments, remote, target='master', # pylint: disable=too-many-arguments def build_pipeline_fix_prompt(mr_iid, branch_name, failed_jobs, remote, - target, mr_description, attempt): + target, mr_description, attempt, + tempdir=None): """Build prompt and task description for the pipeline fix agent + The CI job log tails and MR description are written to temporary files + instead of being embedded in the prompt, to avoid exceeding the Linux + MAX_ARG_STRLEN limit (128 KB) when the Claude Agent SDK passes the + prompt as a command-line argument. + Args: mr_iid (int): Merge request IID branch_name (str): Source branch name @@ -551,6 +559,8 @@ def build_pipeline_fix_prompt(mr_iid, branch_name, failed_jobs, remote, target (str): Target branch mr_description (str): MR description with context attempt (int): Fix attempt number + tempdir (str): Directory for temporary log files; when None + the log tails are embedded in the prompt (legacy behaviour) Returns: tuple: (prompt, task_desc) where prompt is the full agent prompt and @@ -558,20 +568,42 @@ def build_pipeline_fix_prompt(mr_iid, branch_name, failed_jobs, remote, """ task_desc = f'fix {len(failed_jobs)} failed pipeline job(s) (attempt {attempt})' - # Format failed jobs + # Format failed jobs - write log tails to temp files when tempdir is + # provided, to keep the prompt small enough for execve() job_sections = [] for job in failed_jobs: - job_sections.append( - f'### Job: {job.name} (stage: {job.stage})\n' - f'URL: {job.web_url}\n' - f'Log tail:\n```\n{job.log_tail}\n```' - ) + if tempdir and job.log_tail: + safe_name = job.name.replace('/', '_').replace(':', '_') + log_path = os.path.join(tempdir, + f'job-{job.id}-{safe_name}.log') + with open(log_path, 'w', encoding='utf-8') as fout: + fout.write(job.log_tail) + job_sections.append( + f'### Job: {job.name} (stage: {job.stage})\n' + f'URL: {job.web_url}\n' + f'Log tail: read file {log_path}' + ) + else: + job_sections.append( + f'### Job: {job.name} (stage: {job.stage})\n' + f'URL: {job.web_url}\n' + f'Log tail:\n```\n{job.log_tail}\n```' + ) jobs_text = '\n\n'.join(job_sections) - # Include MR description for context + # Include MR description for context - write to file when tempdir + # is provided and the description is non-trivial context_section = '' if mr_description: - context_section = f''' + if tempdir and len(mr_description) > 1024: + desc_path = os.path.join(tempdir, 'mr-description.txt') + with open(desc_path, 'w', encoding='utf-8') as fout: + fout.write(mr_description) + context_section = f''' +Context from MR description: read file {desc_path} +''' + else: + context_section = f''' Context from MR description: {mr_description} @@ -656,6 +688,10 @@ async def run_pipeline_fix_agent(mr_iid, branch_name, failed_jobs, remote, attempt=1, repo_path=None): """Run the Claude agent to fix pipeline failures + Job log tails are written to temporary files so that the prompt + passed to the agent stays well below the Linux MAX_ARG_STRLEN + limit (128 KB). + Args: mr_iid (int): Merge request IID branch_name (str): Source branch name @@ -676,20 +712,24 @@ async def run_pipeline_fix_agent(mr_iid, branch_name, failed_jobs, remote, if repo_path is None: repo_path = os.getcwd() - prompt, task_desc = build_pipeline_fix_prompt( - mr_iid, branch_name, failed_jobs, remote, target, - mr_description, attempt) - - options = ClaudeAgentOptions( - allowed_tools=['Bash', 'Read', 'Grep', 'Edit', 'Write'], - cwd=repo_path, - max_buffer_size=MAX_BUFFER_SIZE, - ) + tempdir = tempfile.mkdtemp(prefix='pickman-logs-') + try: + prompt, task_desc = build_pipeline_fix_prompt( + mr_iid, branch_name, failed_jobs, remote, target, + mr_description, attempt, tempdir=tempdir) + + options = ClaudeAgentOptions( + allowed_tools=['Bash', 'Read', 'Grep', 'Edit', 'Write'], + cwd=repo_path, + max_buffer_size=MAX_BUFFER_SIZE, + ) - tout.info(f'Starting Claude agent to {task_desc}...') - tout.info('') + tout.info(f'Starting Claude agent to {task_desc}...') + tout.info('') - return await run_agent_collect(prompt, options) + return await run_agent_collect(prompt, options) + finally: + shutil.rmtree(tempdir, ignore_errors=True) def fix_pipeline(mr_iid, branch_name, failed_jobs, remote, target='master', diff --git a/tools/pickman/ftest.py b/tools/pickman/ftest.py index fba10d3c4d7..52c807cf8cc 100644 --- a/tools/pickman/ftest.py +++ b/tools/pickman/ftest.py @@ -6005,6 +6005,16 @@ class TestGetFailedJobs(unittest.TestCase): class TestBuildPipelineFixPrompt(unittest.TestCase): """Tests for build_pipeline_fix_prompt function.""" + def setUp(self): + """Set up temp directory for log files.""" + self.tmp_dir = tempfile.mkdtemp(prefix='pickman-test-') + + def tearDown(self): + """Remove temp directory.""" + import shutil + + shutil.rmtree(self.tmp_dir, ignore_errors=True) + def test_single_job(self): """Test prompt with a single failed job""" failed_jobs = [ @@ -6015,16 +6025,22 @@ class TestBuildPipelineFixPrompt(unittest.TestCase): ] prompt, task_desc = agent.build_pipeline_fix_prompt( 42, 'cherry-abc123', failed_jobs, 'ci', 'master', - 'Test MR desc', 1) + 'Test MR desc', 1, tempdir=self.tmp_dir) self.assertIn('!42', prompt) self.assertIn('cherry-abc123', prompt) self.assertIn('build:sandbox', prompt) - self.assertIn('error: undefined reference', prompt) self.assertIn('attempt 1', prompt) self.assertIn('cherry-abc123-fix1', prompt) self.assertIn('1 failed', task_desc) + # Log tail should be in the temp file, not in the prompt + log_path = os.path.join(self.tmp_dir, 'job-1-build_sandbox.log') + self.assertTrue(os.path.exists(log_path)) + with open(log_path, encoding='utf-8') as inf: + self.assertEqual(inf.read(), 'error: undefined reference') + self.assertIn(log_path, prompt) + def test_multiple_jobs(self): """Test prompt with multiple failed jobs""" failed_jobs = [ @@ -6038,14 +6054,19 @@ class TestBuildPipelineFixPrompt(unittest.TestCase): log_tail='test failure'), ] prompt, task_desc = agent.build_pipeline_fix_prompt( - 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1) + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1, + tempdir=self.tmp_dir) self.assertIn('build:sandbox', prompt) self.assertIn('test:dm', prompt) - self.assertIn('build error', prompt) - self.assertIn('test failure', prompt) self.assertIn('2 failed', task_desc) + # Both log files should exist + self.assertTrue(os.path.exists( + os.path.join(self.tmp_dir, 'job-1-build_sandbox.log'))) + self.assertTrue(os.path.exists( + os.path.join(self.tmp_dir, 'job-2-test_dm.log'))) + def test_attempt_number(self): """Test that attempt number is reflected in prompt""" failed_jobs = [ @@ -6055,7 +6076,8 @@ class TestBuildPipelineFixPrompt(unittest.TestCase): log_tail='error'), ] prompt, task_desc = agent.build_pipeline_fix_prompt( - 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 3) + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 3, + tempdir=self.tmp_dir) self.assertIn('attempt 3', prompt) self.assertIn('cherry-abc123-fix3', prompt) @@ -6070,7 +6092,8 @@ class TestBuildPipelineFixPrompt(unittest.TestCase): log_tail='error'), ] prompt, _ = agent.build_pipeline_fix_prompt( - 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1) + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1, + tempdir=self.tmp_dir) self.assertIn('um build sandbox', prompt) @@ -6087,7 +6110,8 @@ class TestBuildPipelineFixPrompt(unittest.TestCase): log_tail='error'), ] prompt, _ = agent.build_pipeline_fix_prompt( - 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1) + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1, + tempdir=self.tmp_dir) # Should include both boards plus sandbox in the buildman command self.assertIn('buildman', prompt) @@ -6104,11 +6128,69 @@ class TestBuildPipelineFixPrompt(unittest.TestCase): log_tail='error'), ] prompt, _ = agent.build_pipeline_fix_prompt( - 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1) + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1, + tempdir=self.tmp_dir) self.assertIn('buildman -o /tmp/pickman', prompt) self.assertIn('coral', prompt) + def test_large_logs_stay_under_limit(self): + """Test that large log tails are written to files, keeping the + prompt well under the Linux MAX_ARG_STRLEN limit (128 KB).""" + # Create 5 jobs with 200-line logs (~120 bytes per line) + big_log = '\n'.join(f'line {i}: ' + 'x' * 100 for i in range(200)) + failed_jobs = [ + gitlab.FailedJob( + id=i, name=f'build:board{i}', stage='build', + web_url=f'https://gitlab.com/job/{i}', + log_tail=big_log) + for i in range(5) + ] + prompt, _ = agent.build_pipeline_fix_prompt( + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', + 'x' * 50000, 1, tempdir=self.tmp_dir) + + # Prompt should be well under 128 KB (the logs are in files) + self.assertLess(len(prompt), 128 * 1024) + + # All 5 log files plus mr-description should exist + log_files = [f for f in os.listdir(self.tmp_dir) + if f.startswith('job-')] + self.assertEqual(len(log_files), 5) + self.assertTrue(os.path.exists( + os.path.join(self.tmp_dir, 'mr-description.txt'))) + + def test_no_tempdir_embeds_inline(self): + """Test legacy behaviour when tempdir is None""" + failed_jobs = [ + gitlab.FailedJob( + id=1, name='build:sandbox', stage='build', + web_url='https://gitlab.com/job/1', + log_tail='error: undefined reference'), + ] + prompt, _ = agent.build_pipeline_fix_prompt( + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', + 'Test MR desc', 1) + + self.assertIn('error: undefined reference', prompt) + self.assertIn('Test MR desc', prompt) + + def test_small_mr_desc_stays_inline(self): + """Test that a small MR description is kept inline""" + failed_jobs = [ + gitlab.FailedJob( + id=1, name='build:sandbox', stage='build', + web_url='https://gitlab.com/job/1', + log_tail='error'), + ] + prompt, _ = agent.build_pipeline_fix_prompt( + 42, 'cherry-abc123', failed_jobs, 'ci', 'master', + 'Short desc', 1, tempdir=self.tmp_dir) + + self.assertIn('Short desc', prompt) + self.assertFalse(os.path.exists( + os.path.join(self.tmp_dir, 'mr-description.txt'))) + class TestProcessPipelineFailures(unittest.TestCase): """Tests for process_pipeline_failures function.""" -- 2.43.0
From: Simon Glass <simon.glass@canonical.com> get_schema_version() catches OperationalError and returns 0 to indicate a fresh database. However, it catches all OperationalError variants, including "attempt to write a readonly database" and "database is locked". When this happens, migrate_to() misinterprets an existing database as empty and re-runs all migrations from scratch, destroying the data. Narrow the catch to only match "no such table", which is the genuine fresh-database case. All other OperationalError variants are re-raised so that callers see the real problem instead of silently losing data. Signed-off-by: Simon Glass <simon.glass@canonical.com> --- tools/pickman/database.py | 14 ++++++++++++-- tools/pickman/ftest.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/tools/pickman/database.py b/tools/pickman/database.py index 317668a979d..28b28896cb2 100644 --- a/tools/pickman/database.py +++ b/tools/pickman/database.py @@ -190,14 +190,24 @@ class Database: # pylint: disable=too-many-public-methods def get_schema_version(self): """Get the version of the database's schema + Only returns 0 when the schema_version table genuinely does not + exist (i.e. a fresh database). Other errors (read-only, locked, + corrupt) are re-raised so that migrate_to() does not accidentally + destroy an existing database by re-running all migrations. + Return: int: Database version, 0 means there is no data + + Raises: + sqlite3.OperationalError: For errors other than a missing table """ try: self.cur.execute('SELECT version FROM schema_version') return self.cur.fetchone()[0] - except sqlite3.OperationalError: - return 0 + except sqlite3.OperationalError as exc: + if 'no such table' in str(exc): + return 0 + raise def execute(self, query, parameters=()): """Execute a database query diff --git a/tools/pickman/ftest.py b/tools/pickman/ftest.py index 52c807cf8cc..261ca4cd2d5 100644 --- a/tools/pickman/ftest.py +++ b/tools/pickman/ftest.py @@ -10,6 +10,7 @@ import asyncio import argparse import os import shutil +import sqlite3 import subprocess import sys import tempfile @@ -398,6 +399,39 @@ class TestDatabase(unittest.TestCase): self.assertEqual(sources[1], ('branch-b', 'def456')) dbs.close() + def test_schema_version_readonly_raises(self): + """Test that a read-only database raises instead of returning 0. + + Previously get_schema_version() caught all OperationalError and + returned 0, which caused migrate_to() to re-create all tables on + top of an existing (read-only) database, wiping the data. + """ + with terminal.capture(): + dbs = database.Database(self.db_path) + dbs.start() + dbs.source_set('us/next', 'abc123') + dbs.commit() + + # Simulate a read-only error by replacing the cursor + real_cur = dbs.cur + mock_cur = mock.MagicMock() + mock_cur.execute.side_effect = sqlite3.OperationalError( + 'attempt to write a readonly database') + dbs.cur = mock_cur + with self.assertRaises(sqlite3.OperationalError): + dbs.get_schema_version() + dbs.cur = real_cur + dbs.close() + + def test_schema_version_missing_table_returns_zero(self): + """Test that a missing schema_version table returns 0.""" + with terminal.capture(): + dbs = database.Database(self.db_path) + dbs.open_it() + # Fresh database has no tables, should return 0 + self.assertEqual(dbs.get_schema_version(), 0) + dbs.close() + class TestDatabaseCommit(unittest.TestCase): """Tests for Database commit functions.""" -- 2.43.0
participants (1)
-
Simon Glass