diff --git a/backend/agents/agent.go b/backend/agents/agent.go index 231037b..30aa9c1 100644 --- a/backend/agents/agent.go +++ b/backend/agents/agent.go @@ -36,6 +36,8 @@ type AgentRequestBody struct { Tools *any `json:"tools,omitempty"` ToolChoice *string `json:"tool_choice,omitempty"` + // ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + AgentMessages } diff --git a/backend/agents/orchestrator.go b/backend/agents/orchestrator.go new file mode 100644 index 0000000..a3d4934 --- /dev/null +++ b/backend/agents/orchestrator.go @@ -0,0 +1,124 @@ +package agents + +import ( + "encoding/json" + "fmt" + + "github.com/google/uuid" +) + +const orchestratorPrompt = ` +You are an Orchestrator for various AI agents. + +The user will send you images and you have to determine which agents you have to call, in order to best help the user. + +You might decide no agent needs to be called. + +The agents are available as tool calls. + +Agents available: + +eventLocationAgent + +Use it when you think the image contains an event or a location of any sort. This can be an event page, a map, an address or a date. + +noteAgent + +Use it when there is text on the screen. Any text, always use this. Use me! + +defaultAgent + +When none of the above apply. + +Always call agents in parallel if you need to call more than 1. +` + +const MY_TOOLS = ` +[ + { + "type": "function", + "function": { + "name": "eventLocationAgent", + "description": "Uses the event location agent", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "noteAgent", + "description": "Uses the note agent", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "defaultAgent", + "description": "Used when you dont think its a good idea to call other agents", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + } +]` + +type OrchestratorAgent struct { + client AgentClient +} + +func (agent OrchestratorAgent) Orchestrate(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { + toolChoice := "any" + + var tools any + err := json.Unmarshal([]byte(MY_TOOLS), &tools) + if err != nil { + return err + } + + request := AgentRequestBody{ + Model: "pixtral-12b-2409", + Temperature: 0.3, + ResponseFormat: ResponseFormat{ + Type: "text", + }, + ToolChoice: &toolChoice, + Tools: &tools, + } + + err = request.AddSystem(orchestratorPrompt) + if err != nil { + return err + } + + request.AddImage(imageName, imageData) + resp, err := agent.client.Request(&request) + if err != nil { + return err + } + + fmt.Println(resp) + + return nil +} + +func NewOrchestratorAgent() (OrchestratorAgent, error) { + agent, err := CreateAgentClient(orchestratorPrompt) + if err != nil { + return OrchestratorAgent{}, err + } + + return OrchestratorAgent{ + client: agent, + }, nil +} diff --git a/backend/main.go b/backend/main.go index 60d8079..f1b0530 100644 --- a/backend/main.go +++ b/backend/main.go @@ -120,6 +120,11 @@ func main() { panic(err) } + orchestrator, err := agents.NewOrchestratorAgent() + if err != nil { + panic(err) + } + image, err := imageModel.GetToProcessWithData(ctx, imageId) if err != nil { log.Println("Failed to GetToProcessWithData") @@ -134,6 +139,10 @@ func main() { return } + orchestrator.Orchestrate(image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) + + return + // TODO: this can very much be parallel log.Println("Calling locationAgent!")