Haystack/backend/agents/orchestrator.go
John Costa b0f4a45c40 feat(contact-agent): working contact agent
Built this in under 20 minutes. Getting some really good agents
2025-04-11 21:12:06 +01:00

194 lines
4.7 KiB
Go

package agents
import (
"encoding/json"
"errors"
"fmt"
"screenmark/screenmark/agents/client"
"github.com/google/uuid"
)
const orchestratorPrompt = `
You are an Orchestrator for various AI agents.
The user will send you images and you have to determine which agents you have to call, in order to best help the user.
You might decide no agent needs to be called.
The agents are available as tool calls.
Agents available:
eventLocationAgent
Use it when you think the image contains an event or a location of any sort. This can be an event page, a map, an address or a date.
noteAgent
Use it when there is text on the screen. Any text, always use this. Use me!
contactAgent
Use it when the image contains information relating a person.
defaultAgent
When none of the above apply.
Always call agents in parallel if you need to call more than 1.
Do not call the agent if you do not think it is relevant for the image.
`
const MY_TOOLS = `
[
{
"type": "function",
"function": {
"name": "eventLocationAgent",
"description": "Uses the event location agent",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "noteAgent",
"description": "Uses the note agent",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "contactAgent",
"description": "Uses the contact/people agent",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "defaultAgent",
"description": "Used when you dont think its a good idea to call other agents",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
]`
type OrchestratorAgent struct {
client client.AgentClient
}
type Status struct {
Ok bool `json:"ok"`
}
// TODO: the primary function of the agent could be extracted outwards.
// This is basically the same function as we have in the `event_location_agent.go`
func (agent OrchestratorAgent) Orchestrate(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error {
toolChoice := "any"
var tools any
err := json.Unmarshal([]byte(MY_TOOLS), &tools)
if err != nil {
return err
}
request := client.AgentRequestBody{
Model: "pixtral-12b-2409",
Temperature: 0.3,
ResponseFormat: client.ResponseFormat{
Type: "text",
},
ToolChoice: &toolChoice,
Tools: &tools,
EndToolCall: "defaultAgent",
Chat: &client.Chat{
Messages: make([]client.ChatMessage, 0),
},
}
request.Chat.AddSystem(orchestratorPrompt)
request.Chat.AddImage(imageName, imageData)
res, err := agent.client.Request(&request)
if err != nil {
return err
}
fmt.Println(res)
toolHandlerInfo := client.ToolHandlerInfo{
ImageId: imageId,
UserId: userId,
}
return agent.client.ToolLoop(toolHandlerInfo, &request)
}
func NewOrchestratorAgent(eventLocationAgent EventLocationAgent, noteAgent NoteAgent, contactAgent ContactAgent, imageName string, imageData []byte) (OrchestratorAgent, error) {
agent, err := client.CreateAgentClient()
if err != nil {
return OrchestratorAgent{}, err
}
agent.ToolHandler.AddTool("eventLocationAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
// We need a way to keep track of this async?
// Probably just a DB, because we don't want to wait. The orchistrator shouldnt wait for this stuff to finish.
go eventLocationAgent.GetLocations(info.UserId, info.ImageId, imageName, imageData)
return Status{
Ok: true,
}, nil
})
agent.ToolHandler.AddTool("noteAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
go noteAgent.GetNotes(info.UserId, info.ImageId, imageName, imageData)
return Status{
Ok: true,
}, nil
})
agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
go contactAgent.GetContacts(info.UserId, info.ImageId, imageName, imageData)
return Status{
Ok: true,
}, nil
})
agent.ToolHandler.AddTool("defaultAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
// To nothing
return Status{
Ok: true,
}, errors.New("Finished! Kinda bad return type but...")
})
return OrchestratorAgent{
client: agent,
}, nil
}