providers+policy: scope gemini cogitate yolo via policy file

+1 -1

pyproject.toml

··· 112 112 113 113 [tool.setuptools.package-data] 114 114 apps = ["*/templates/*.html", "*/talent/*.md"] 115 - think = ["*.md", "*.json", "templates/*.md"] 115 + think = ["*.md", "*.json", "templates/*.md", "policies/*.toml"] 116 116 talent = ["*.md", "*.py"] 117 117 observe = ["*.md", "categories/*.md", "transcribe/*.md"] 118 118 convey = [

+14 -3

tests/test_cogitate_coder.py

··· 147 147 return importlib.import_module("think.providers.google") 148 148 149 149 @patch("think.providers.google.CLIRunner") 150 - def test_no_write_uses_plan_mode(self, mock_runner_cls): 151 - """Without write flag, approval-mode is plan (read-only).""" 150 + def test_no_write_uses_yolo_with_policy(self, mock_runner_cls): 151 + """Without write flag, approval-mode is yolo with scoped policy.""" 152 152 provider = self._provider() 153 153 mock_instance = AsyncMock() 154 154 mock_instance.run = AsyncMock(return_value="result") ··· 160 160 161 161 cmd = mock_runner_cls.call_args.kwargs["cmd"] 162 162 idx = cmd.index("--approval-mode") 163 - assert cmd[idx + 1] == "plan" 163 + assert cmd[idx + 1] == "yolo" 164 + policy_idx = cmd.index("--policy") 165 + assert cmd[policy_idx + 1].endswith("policies/cogitate.toml") 164 166 165 167 @patch("think.providers.google.CLIRunner") 166 168 def test_write_true_uses_yolo_mode(self, mock_runner_cls): ··· 177 179 cmd = mock_runner_cls.call_args.kwargs["cmd"] 178 180 idx = cmd.index("--approval-mode") 179 181 assert cmd[idx + 1] == "yolo" 182 + assert "--policy" not in cmd 183 + 184 + def test_cogitate_policy_file_exists_on_disk(self): 185 + """The policy path wired into argv must resolve to a real file.""" 186 + from think.providers.google import _COGITATE_POLICY_PATH 187 + 188 + assert _COGITATE_POLICY_PATH.is_file(), ( 189 + f"Expected policy file at {_COGITATE_POLICY_PATH}" 190 + ) 180 191 181 192 182 193 # ---------------------------------------------------------------------------

+5 -2

tests/test_google_cli.py

··· 29 29 cmd = MockCLIRunner.last_instance.cmd 30 30 idx = cmd.index("--approval-mode") 31 31 assert cmd[idx + 1] == "yolo" 32 + assert "--policy" not in cmd 32 33 33 34 34 35 class TestTranslateGemini: ··· 329 330 330 331 return MockCLIRunner 331 332 332 - def test_yolo_mode_with_sol_allowed(self): 333 + def test_no_write_uses_yolo_with_policy(self): 333 334 provider = _google_provider() 334 335 MockCLIRunner = self._mock_runner() 335 336 with patch("think.providers.google.CLIRunner", MockCLIRunner): ··· 340 341 ) 341 342 cmd = MockCLIRunner.last_instance.cmd 342 343 idx = cmd.index("--approval-mode") 343 - assert cmd[idx + 1] == "plan" 344 + assert cmd[idx + 1] == "yolo" 345 + policy_idx = cmd.index("--policy") 346 + assert cmd[policy_idx + 1].endswith("policies/cogitate.toml") 344 347 345 348 def test_write_mode_uses_yolo_approval(self): 346 349 _assert_write_mode_uses_yolo_approval(self._mock_runner)

+32

think/policies/cogitate.toml

··· 1 + # Scoped-yolo policy for non-write cogitate talents. 2 + # 3 + # Posture: start from yolo's default-allow, then subtract the two behaviors 4 + # we don't want — direct filesystem writes, and arbitrary shell commands. 5 + # Run `sol` invocations (including pipelines like `echo ... | sol call ...`) 6 + # are the one shell surface cogitate talents legitimately need. 7 + # 8 + # Priority: highest matching priority wins regardless of allow/deny. The 9 + # allow rule for sol run_shell_command (priority 200) overrides the blanket 10 + # run_shell_command deny (priority 100). User-tier rules here override the 11 + # engine's built-in yolo catch-all. 12 + # 13 + # Rationale: vpe/workspace/gemini-cli-tool-hallucination-research.md — plan 14 + # mode strips run_shell_command from the tool registry, which caused the 15 + # tool-name hallucination loop we saw in cortex. Scoped yolo keeps the 16 + # registry intact without widening the blast radius to direct writes. 17 + 18 + [[rule]] 19 + toolName = ["write_file", "replace"] 20 + decision = "deny" 21 + priority = 200 22 + 23 + [[rule]] 24 + toolName = "run_shell_command" 25 + commandRegex = "(^sol\\s|\\bsol call\\b)" 26 + decision = "allow" 27 + priority = 200 28 + 29 + [[rule]] 30 + toolName = "run_shell_command" 31 + decision = "deny" 32 + priority = 100

+16 -9

think/providers/google.py

··· 65 65 # Backend detection cache 66 66 _detected_backend: str | None = None 67 67 68 + _COGITATE_POLICY_PATH = Path(__file__).parent.parent / "policies" / "cogitate.toml" 69 + 68 70 69 71 def _structured_to_google_contents( 70 72 messages: list[dict[str, str]], ··· 749 751 if system_instruction: 750 752 prompt_body = system_instruction + "\n\n" + prompt_body 751 753 752 - # Build CLI command. approval-mode controls tool access: 753 - # "yolo" — auto-approve all tools (write-enabled agents only) 754 - # "plan" — read-only mode (no file writes, no destructive tools) 755 - # The deprecated --allowed-tools flag did NOT restrict tool 756 - # availability, only auto-approval — combined with --yolo it 757 - # provided zero protection. --approval-mode plan is the 758 - # replacement that actually enforces read-only. 759 - approval = "yolo" if config.get("write") else "plan" 754 + # Approval posture: 755 + # - Write-enabled talents (coder) run unpolicied yolo: full tool registry, 756 + # write_file / replace allowed. 757 + # - Read-only cogitate talents run yolo + a scoped policy: full tool 758 + # registry (no plan-mode stripping), but write_file / replace denied 759 + # and run_shell_command narrowed to `sol` invocations. 760 + # Plan mode strips run_shell_command from the registry, which drove the 761 + # tool-name hallucination loop documented in 762 + # vpe/workspace/gemini-cli-tool-hallucination-research.md. Deprecated 763 + # --allowed-tools controls auto-approval, not availability, so it can't 764 + # replace the policy file for this purpose. 760 765 cmd = [ 761 766 "gemini", 762 767 "-p", ··· 764 769 "-o", 765 770 "stream-json", 766 771 "--approval-mode", 767 - approval, 772 + "yolo", 768 773 "-m", 769 774 model, 770 775 "--sandbox=none", 771 776 ] 777 + if not config.get("write"): 778 + cmd.extend(["--policy", str(_COGITATE_POLICY_PATH)]) 772 779 773 780 # Resume from previous session if continuing 774 781 if session_id:

Configure Feed

Configure Feed