160 lines
5.4 KiB
Nix
160 lines
5.4 KiB
Nix
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
inputs,
|
|
...
|
|
}:
|
|
let
|
|
# what model should be used in place of haiku?
|
|
# glm 4.7-flash is an example
|
|
haiku-model = "openrouter/z-ai/glm-4.7";
|
|
|
|
opus-model = "openrouter/moonshotai/kimi-k2.5";
|
|
|
|
ohMyOpencodeConfig = {
|
|
"$schema" =
|
|
"https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json";
|
|
agents = {
|
|
sisyphus.model = opus-model;
|
|
oracle = {
|
|
model = "openrouter/openai/gpt-5.2";
|
|
variant = "high";
|
|
};
|
|
librarian.model = haiku-model;
|
|
explore.model = haiku-model;
|
|
multimodal-looker.model = "openrouter/google/gemini-3-flash-preview";
|
|
|
|
prometheus.model = opus-model;
|
|
metis.model = opus-model;
|
|
momus = {
|
|
model = "openrouter/openai/gpt-5.2";
|
|
variant = "medium";
|
|
};
|
|
atlas.model = opus-model;
|
|
};
|
|
categories = {
|
|
visual-engineering.model = "openrouter/google/gemini-3-pro";
|
|
ultrabrain = {
|
|
model = "openrouter/openai/gpt-5.2-codex";
|
|
variant = "xhigh";
|
|
};
|
|
artistry = {
|
|
model = "openrouter/google/gemini-3-pro";
|
|
variant = "max";
|
|
};
|
|
quick.model = haiku-model;
|
|
"unspecified-low".model = opus-model;
|
|
"unspecified-high".model = opus-model;
|
|
writing.model = "openrouter/google/gemini-3-flash-preview";
|
|
};
|
|
};
|
|
in
|
|
{
|
|
home.packages = [
|
|
inputs.oh-my-opencode.packages.${pkgs.stdenv.hostPlatform.system}.oh-my-opencode
|
|
];
|
|
|
|
xdg.configFile."opencode/oh-my-opencode.json".text = builtins.toJSON ohMyOpencodeConfig;
|
|
|
|
programs.opencode = {
|
|
package = inputs.llm-agents.packages.${pkgs.stdenv.targetPlatform.system}.opencode;
|
|
enable = true;
|
|
rules = ''
|
|
You are an intelligent and observant agent.
|
|
If instructed to commit, disable gpg signing.
|
|
You are on nixOS, if you don't have access to a tool, you can access it via the `nix-shell` command.
|
|
|
|
## Think deeply about everything.
|
|
When given a problem, break it down, abstract it out, understand the fundamentals, then solve it in the real world.
|
|
|
|
## Misc
|
|
For long-running commands, make sure you set the timeout of the Bash tool provided to a larger value.
|
|
Do NOT read secret files. Do not directly read files that are api keys or are contextually sensitive.
|
|
|
|
## Behavior
|
|
Do not be sycophantic in your responses.
|
|
Do not use emojis unless explicitly asked to. This includes in code.
|
|
Use Test Driven Development methodology.
|
|
|
|
## Nix
|
|
For using `nix build` append `-L` to get better visibility into the logs.
|
|
If you get an error that a file can't be found, always try to `git add` the file before trying other troubleshooting steps.
|
|
|
|
|
|
## Android UI Interaction Workflow Summary
|
|
1. Taking Screenshots
|
|
adb exec-out screencap -p > /tmp/screen.png
|
|
Captures the current screen state as a PNG image.
|
|
|
|
2. Analyzing Screenshots
|
|
I delegate screenshot analysis to an explore agent rather than analyzing images directly:
|
|
mcp_task(subagent_type="explore", prompt="Analyze /tmp/screen.png. What screen is this? What elements are visible?")
|
|
The agent describes the UI, identifies elements, and estimates Y coordinates.
|
|
|
|
3. Getting Precise Element Coordinates
|
|
UI Automator dump - extracts the full UI hierarchy as XML:
|
|
adb shell uiautomator dump /sdcard/ui.xml && adb pull /sdcard/ui.xml /tmp/ui.xml
|
|
Then grep for specific elements:
|
|
# Find by text
|
|
grep -oP 'text="Login".*?bounds="[^"]*"' /tmp/ui.xml
|
|
# Find by class
|
|
grep -oP 'class="android.widget.EditText".*?bounds="[^"]*"' /tmp/ui.xml
|
|
Bounds format: [left,top][right,bottom] → tap center: ((left+right)/2, (top+bottom)/2)
|
|
|
|
4. Tapping Elements
|
|
adb shell input tap X Y
|
|
Where X, Y are pixel coordinates from the bounds.
|
|
|
|
5. Text Input
|
|
adb shell input text "some_text"
|
|
Note: Special characters need escaping (\!, \;, etc.)
|
|
|
|
6. Other Gestures
|
|
# Swipe/scroll
|
|
adb shell input swipe startX startY endX endY duration_ms
|
|
# Key events
|
|
adb shell input keyevent KEYCODE_BACK
|
|
adb shell input keyevent KEYCODE_ENTER
|
|
|
|
7. WebView Limitation
|
|
- UI Automator can see WebView content if accessibility is enabled
|
|
- Touch events on iframe content (like Cloudflare Turnstile) often fail due to cross-origin isolation
|
|
- Form fields in WebViews work if you get exact bounds from the UI dump
|
|
|
|
Typical Flow
|
|
1. Take screenshot → analyze with explore agent (get rough layout)
|
|
2. Dump UI hierarchy → grep for exact element bounds
|
|
- NEVER ASSUME COORDINATES. You must ALWAYS check first.
|
|
- Do this before ANY tap action as elements on the screen may of changed.
|
|
3. Calculate center coordinates from bounds
|
|
4. Tap/interact
|
|
5. Wait → screenshot → verify result
|
|
'';
|
|
settings = {
|
|
theme = "opencode";
|
|
|
|
model = opus-model;
|
|
# small model used for titles
|
|
small_model = "openrouter/openai/gpt-oss-20b:free";
|
|
|
|
autoshare = false;
|
|
autoupdate = true;
|
|
agent = { };
|
|
plugin = [ "oh-my-opencode" ];
|
|
provider = {
|
|
openrouter = {
|
|
models = {
|
|
"openai/gpt-oss-20b:free" = { };
|
|
"minimax/minimax-m2.1" = { };
|
|
};
|
|
options = {
|
|
# TODO! use agenix here instead
|
|
apiKey = "{file:${../secrets/openrouter_api_key}}";
|
|
};
|
|
};
|
|
};
|
|
};
|
|
};
|
|
}
|