204 lines
4.9 KiB
Go
204 lines
4.9 KiB
Go
package agents
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"screenmark/screenmark/agents/client"
|
|
"time"
|
|
|
|
"github.com/charmbracelet/log"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
const orchestratorPrompt = `
|
|
You are an Orchestrator for various AI agents.
|
|
|
|
The user will send you images and you have to determine which agents you have to call, in order to best help the user.
|
|
|
|
You might decide no agent needs to be called.
|
|
|
|
The agents are available as tool calls.
|
|
|
|
Agents available:
|
|
|
|
eventLocationAgent
|
|
|
|
Use it when you think the image contains an event or a location of any sort. This can be an event page, a map, an address or a date.
|
|
|
|
noteAgent
|
|
|
|
Use it when there is text on the screen. Any text, always use this. Use me!
|
|
|
|
contactAgent
|
|
|
|
Use it when the image contains information relating a person.
|
|
|
|
defaultAgent
|
|
|
|
When none of the above apply.
|
|
|
|
Always call agents in parallel if you need to call more than 1.
|
|
|
|
Do not call the agent if you do not think it is relevant for the image.
|
|
`
|
|
|
|
const MY_TOOLS = `
|
|
[
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "eventLocationAgent",
|
|
"description": "Uses the event location agent",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {},
|
|
"required": []
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "noteAgent",
|
|
"description": "Uses the note agent",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {},
|
|
"required": []
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "contactAgent",
|
|
"description": "Uses the contact/people agent",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {},
|
|
"required": []
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "defaultAgent",
|
|
"description": "Used when you dont think its a good idea to call other agents",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {},
|
|
"required": []
|
|
}
|
|
}
|
|
}
|
|
]`
|
|
|
|
type OrchestratorAgent struct {
|
|
client client.AgentClient
|
|
|
|
log log.Logger
|
|
}
|
|
|
|
type Status struct {
|
|
Ok bool `json:"ok"`
|
|
}
|
|
|
|
// TODO: the primary function of the agent could be extracted outwards.
|
|
// This is basically the same function as we have in the `event_location_agent.go`
|
|
func (agent OrchestratorAgent) Orchestrate(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error {
|
|
toolChoice := "any"
|
|
|
|
var tools any
|
|
err := json.Unmarshal([]byte(MY_TOOLS), &tools)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
request := client.AgentRequestBody{
|
|
Model: "pixtral-12b-2409",
|
|
Temperature: 0.3,
|
|
ResponseFormat: client.ResponseFormat{
|
|
Type: "text",
|
|
},
|
|
ToolChoice: &toolChoice,
|
|
Tools: &tools,
|
|
|
|
EndToolCall: "defaultAgent",
|
|
|
|
Chat: &client.Chat{
|
|
Messages: make([]client.ChatMessage, 0),
|
|
},
|
|
}
|
|
|
|
request.Chat.AddSystem(orchestratorPrompt)
|
|
request.Chat.AddImage(imageName, imageData)
|
|
|
|
res, err := agent.client.Request(&request)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
fmt.Println(res)
|
|
|
|
toolHandlerInfo := client.ToolHandlerInfo{
|
|
ImageId: imageId,
|
|
UserId: userId,
|
|
}
|
|
|
|
return agent.client.ToolLoop(toolHandlerInfo, &request)
|
|
}
|
|
|
|
func NewOrchestratorAgent(eventLocationAgent EventLocationAgent, noteAgent NoteAgent, contactAgent ContactAgent, imageName string, imageData []byte) (OrchestratorAgent, error) {
|
|
agent, err := client.CreateAgentClient(log.NewWithOptions(os.Stdout, log.Options{
|
|
ReportTimestamp: true,
|
|
TimeFormat: time.Kitchen,
|
|
Prefix: "Orchestrator 🎼",
|
|
}))
|
|
|
|
if err != nil {
|
|
return OrchestratorAgent{}, err
|
|
}
|
|
|
|
agent.ToolHandler.AddTool("eventLocationAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
|
|
// We need a way to keep track of this async?
|
|
// Probably just a DB, because we don't want to wait. The orchistrator shouldnt wait for this stuff to finish.
|
|
|
|
go eventLocationAgent.GetLocations(info.UserId, info.ImageId, imageName, imageData)
|
|
|
|
return Status{
|
|
Ok: true,
|
|
}, nil
|
|
})
|
|
|
|
agent.ToolHandler.AddTool("noteAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
|
|
go noteAgent.GetNotes(info.UserId, info.ImageId, imageName, imageData)
|
|
|
|
return Status{
|
|
Ok: true,
|
|
}, nil
|
|
})
|
|
|
|
agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
|
|
go contactAgent.GetContacts(info.UserId, info.ImageId, imageName, imageData)
|
|
|
|
return Status{
|
|
Ok: true,
|
|
}, nil
|
|
})
|
|
|
|
agent.ToolHandler.AddTool("defaultAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
|
|
// To nothing
|
|
|
|
return Status{
|
|
Ok: true,
|
|
}, errors.New("Finished! Kinda bad return type but...")
|
|
})
|
|
|
|
return OrchestratorAgent{
|
|
client: agent,
|
|
}, nil
|
|
}
|