package agents import ( "screenmark/screenmark/agents/client" "github.com/charmbracelet/log" ) const orchestratorPrompt = ` **Role:** You are an Orchestrator AI responsible for analyzing images provided by the user. **Primary Task:** Examine the input image and determine which specialized AI agent(s), available as tool calls, should be invoked to process the relevant information within the image, or if no specialized processing is needed. Your goal is to either extract and structure useful information for the user by selecting the most appropriate tool(s) or explicitly indicate that no specific action is required. **Input:** User-provided image. **Analysis Process & Decision Logic:** 1. **Analyze Image Content:** Scrutinize the image for distinct types of information: * General text/writing (including code, formulas) * Information about a person or contact details * Information about a place, location, or address * Information about an event * Content that doesn't fit any specific category or lacks actionable information. 2. **Agent Selection - Determine ALL that apply:** * **contactAgent:** Is there information specifically related to a person or their contact details (e.g., business card, name/email/phone)? If YES, select contactAgent. * **locationAgent:** Is there information specifically identifying a place, location, or address (e.g., map, street sign, address text)? If YES, select locationAgent. * **eventAgent:** Is there information specifically related to an event (e.g., invitation, poster with date/time, schedule)? If YES, select eventAgent. * **noteAgent** Does the image contain *any* text/writing (including code, formulas)? **Available Agents (Tools):** * **noteAgent**: Use when there is any text on the image, this can be code/text/formulas any writing. * **contactAgent**: Use when the image contains some person or contact. * **locationAgent**: Use when the image contains some place, location or address. * **eventAgent**: Use when the image contains some event. * **noAgent**: Call this when you are done working on this image. **Execution Rules:** * Call all applicable specialized agents (noteAgent, contactAgent, locationAgent, eventAgent) simultaneously (in parallel). ` const orchestratorTools = ` [ { "type": "function", "function": { "name": "noteAgent", "description": "Extracts general textual content like handwritten notes, paragraphs in documents, presentation slides, code snippets, or mathematical formulas. Use this for significant text that isn't primarily contact details, an address, or specific event information.", "parameters": { "type": "object", "properties": {}, "required": [] } } }, { "type": "function", "function": { "name": "contactAgent", "description": "Extracts personal contact information. Use when the image clearly shows details like names, phone numbers, email addresses, job titles, or company names, especially from sources like business cards, email signatures, or contact lists.", "parameters": { "type": "object", "properties": {}, "required": [] } } }, { "type": "function", "function": { "name": "locationAgent", "description": "Identifies and extracts specific geographic locations or addresses. Use for content like street addresses on mail or signs, place names (e.g., restaurant, shop), map snippets, or recognizable landmarks.", "parameters": { "type": "object", "properties": {}, "required": [] } } }, { "type": "function", "function": { "name": "eventAgent", "description": "Extracts details related to scheduled events, appointments, or specific occasions. Use when the image contains information like event titles, dates, times, venues, agendas, or descriptions, typically found on invitations, posters, calendar entries, or schedules.", "parameters": { "type": "object", "properties": {}, "required": [] } } }, { "type": "function", "function": { "name": "noAgent", "description": "Extracts details related to scheduled events, appointments, or specific occasions. Use when the image contains information like event titles, dates, times, venues, agendas, or descriptions, typically found on invitations, posters, calendar entries, or schedules.", "parameters": { "type": "object", "properties": {}, "required": [] } } } ] ` type OrchestratorAgent struct { Client client.AgentClient log log.Logger } type Status struct { Ok bool `json:"ok"` } func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent client.AgentClient, locationAgent client.AgentClient, eventAgent client.AgentClient, imageName string, imageData []byte) client.AgentClient { agent := client.CreateAgentClient(client.CreateAgentClientOptions{ SystemPrompt: orchestratorPrompt, JsonTools: orchestratorTools, Log: log, EndToolCall: "noAgent", }) agent.ToolHandler.AddTool("noteAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { // go noteAgent.GetNotes(info.UserId, info.ImageId, imageName, imageData) return "noteAgent called successfully", nil }) agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { go contactAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData) return "contactAgent called successfully", nil }) agent.ToolHandler.AddTool("locationAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { go locationAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData) return "locationAgent called successfully", nil }) agent.ToolHandler.AddTool("eventAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { go eventAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData) return "eventAgent called successfully", nil }) agent.ToolHandler.AddTool("noAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { return "ok", nil }) return agent }