Files
dotfiles/home-manager/progs/opencode.nix
2026-01-31 00:53:29 -05:00

161 lines
5.5 KiB
Nix

{
config,
lib,
pkgs,
inputs,
...
}:
let
# what model should be used in place of haiku?
# glm 4.7-flash is an example
haiku-model = "openrouter/z-ai/glm-4.7-flash";
opus-model = "openrouter/moonshotai/kimi-k2.5";
ohMyOpencodeConfig = {
"$schema" =
"https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json";
agents = {
sisyphus.model = opus-model;
oracle = {
model = "openrouter/openai/gpt-5.2";
variant = "high";
};
librarian.model = haiku-model;
explore.model = haiku-model;
multimodal-looker.model = "openrouter/google/gemini-3-flash-preview";
prometheus.model = opus-model;
metis.model = opus-model;
momus = {
model = "openrouter/openai/gpt-5.2";
variant = "medium";
};
atlas.model = opus-model;
};
categories = {
visual-engineering.model = "openrouter/google/gemini-3-pro";
ultrabrain = {
model = "openrouter/openai/gpt-5.2-codex";
variant = "xhigh";
};
artistry = {
model = "openrouter/google/gemini-3-pro";
variant = "max";
};
quick.model = haiku-model;
"unspecified-low".model = opus-model;
"unspecified-high".model = opus-model;
writing.model = "openrouter/google/gemini-3-flash-preview";
};
};
in
{
home.packages = [
inputs.oh-my-opencode.packages.${pkgs.stdenv.hostPlatform.system}.oh-my-opencode
];
xdg.configFile."opencode/oh-my-opencode.json".text = builtins.toJSON ohMyOpencodeConfig;
programs.opencode = {
package = inputs.llm-agents.packages.${pkgs.stdenv.targetPlatform.system}.opencode;
enable = true;
rules = ''
You are an intelligent and observant agent.
If instructed to commit, disable gpg signing.
You are on nixOS, if you don't have access to a tool, you can access it via the `nix-shell` command.
## Think deeply about everything.
When given a problem, break it down, abstract it out, understand the fundamentals, then solve it in the real world.
## Misc
For long-running commands, make sure you set the timeout of the Bash tool provided to a larger value.
Do NOT read secret files. Do not directly read files that are api keys or are contextually sensitive.
## Behavior
Do not be sycophantic in your responses.
Do not use emojis unless explicitly asked to. This includes in code.
Use Test Driven Development methodology.
## Nix
For using `nix build` append `-L` to get better visibility into the logs.
If you get an error that a file can't be found, always try to `git add` the file before trying other troubleshooting steps.
## Android UI Interaction Workflow Summary
1. Taking Screenshots
adb exec-out screencap -p > /tmp/screen.png
Captures the current screen state as a PNG image.
2. Analyzing Screenshots
I delegate screenshot analysis to an explore agent rather than analyzing images directly:
mcp_task(subagent_type="explore", prompt="Analyze /tmp/screen.png. What screen is this? What elements are visible?")
The agent describes the UI, identifies elements, and estimates Y coordinates.
3. Getting Precise Element Coordinates
UI Automator dump - extracts the full UI hierarchy as XML:
adb shell uiautomator dump /sdcard/ui.xml && adb pull /sdcard/ui.xml /tmp/ui.xml
Then grep for specific elements:
# Find by text
grep -oP 'text="Login".*?bounds="[^"]*"' /tmp/ui.xml
# Find by class
grep -oP 'class="android.widget.EditText".*?bounds="[^"]*"' /tmp/ui.xml
Bounds format: [left,top][right,bottom] tap center: ((left+right)/2, (top+bottom)/2)
4. Tapping Elements
adb shell input tap X Y
Where X, Y are pixel coordinates from the bounds.
5. Text Input
adb shell input text "some_text"
Note: Special characters need escaping (\!, \;, etc.)
6. Other Gestures
# Swipe/scroll
adb shell input swipe startX startY endX endY duration_ms
# Key events
adb shell input keyevent KEYCODE_BACK
adb shell input keyevent KEYCODE_ENTER
7. WebView Limitation
- UI Automator can see WebView content if accessibility is enabled
- Touch events on iframe content (like Cloudflare Turnstile) often fail due to cross-origin isolation
- Form fields in WebViews work if you get exact bounds from the UI dump
Typical Flow
1. Take screenshot analyze with explore agent (get rough layout)
2. Dump UI hierarchy grep for exact element bounds
- NEVER ASSUME COORDINATES. You must ALWAYS check first.
- Do this before ANY tap action as elements on the screen may of changed.
3. Calculate center coordinates from bounds
4. Tap/interact
5. Wait screenshot verify result
'';
settings = {
theme = "opencode";
model = opus-model;
# small model used for titles
small_model = "openrouter/openai/gpt-oss-20b:free";
autoshare = false;
autoupdate = true;
agent = { };
plugin = [ "oh-my-opencode" ];
provider = {
openrouter = {
models = {
"openai/gpt-oss-20b:free" = { };
"minimax/minimax-m2.1" = { };
"z-ai/glm-4.7-flash" = { };
};
options = {
# TODO! use agenix here instead
apiKey = "{file:${../secrets/openrouter_api_key}}";
};
};
};
};
};
}