wip(orchestrator): basic scaffolding for the agent

This commit is contained in:
2025-04-04 20:40:31 +01:00
parent fe7c92b622
commit 8a165c2042
3 changed files with 135 additions and 0 deletions

View File

@ -36,6 +36,8 @@ type AgentRequestBody struct {
Tools *any `json:"tools,omitempty"` Tools *any `json:"tools,omitempty"`
ToolChoice *string `json:"tool_choice,omitempty"` ToolChoice *string `json:"tool_choice,omitempty"`
// ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"`
AgentMessages AgentMessages
} }

View File

@ -0,0 +1,124 @@
package agents
import (
"encoding/json"
"fmt"
"github.com/google/uuid"
)
const orchestratorPrompt = `
You are an Orchestrator for various AI agents.
The user will send you images and you have to determine which agents you have to call, in order to best help the user.
You might decide no agent needs to be called.
The agents are available as tool calls.
Agents available:
eventLocationAgent
Use it when you think the image contains an event or a location of any sort. This can be an event page, a map, an address or a date.
noteAgent
Use it when there is text on the screen. Any text, always use this. Use me!
defaultAgent
When none of the above apply.
Always call agents in parallel if you need to call more than 1.
`
const MY_TOOLS = `
[
{
"type": "function",
"function": {
"name": "eventLocationAgent",
"description": "Uses the event location agent",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "noteAgent",
"description": "Uses the note agent",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "defaultAgent",
"description": "Used when you dont think its a good idea to call other agents",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
]`
type OrchestratorAgent struct {
client AgentClient
}
func (agent OrchestratorAgent) Orchestrate(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error {
toolChoice := "any"
var tools any
err := json.Unmarshal([]byte(MY_TOOLS), &tools)
if err != nil {
return err
}
request := AgentRequestBody{
Model: "pixtral-12b-2409",
Temperature: 0.3,
ResponseFormat: ResponseFormat{
Type: "text",
},
ToolChoice: &toolChoice,
Tools: &tools,
}
err = request.AddSystem(orchestratorPrompt)
if err != nil {
return err
}
request.AddImage(imageName, imageData)
resp, err := agent.client.Request(&request)
if err != nil {
return err
}
fmt.Println(resp)
return nil
}
func NewOrchestratorAgent() (OrchestratorAgent, error) {
agent, err := CreateAgentClient(orchestratorPrompt)
if err != nil {
return OrchestratorAgent{}, err
}
return OrchestratorAgent{
client: agent,
}, nil
}

View File

@ -120,6 +120,11 @@ func main() {
panic(err) panic(err)
} }
orchestrator, err := agents.NewOrchestratorAgent()
if err != nil {
panic(err)
}
image, err := imageModel.GetToProcessWithData(ctx, imageId) image, err := imageModel.GetToProcessWithData(ctx, imageId)
if err != nil { if err != nil {
log.Println("Failed to GetToProcessWithData") log.Println("Failed to GetToProcessWithData")
@ -134,6 +139,10 @@ func main() {
return return
} }
orchestrator.Orchestrate(image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image)
return
// TODO: this can very much be parallel // TODO: this can very much be parallel
log.Println("Calling locationAgent!") log.Println("Calling locationAgent!")