feat: event agent calling location agent about location ID

This is pretty nice. We can now have agents spawn other agents and actually get super cool functionality from it. The pattern might be a little fragile.
2025-04-16 14:43:07 +01:00
parent aacecfffac
commit fa486153b4
5 changed files with 60 additions and 11 deletions
--- a/backend/agents/client/client.go
+++ b/backend/agents/client/client.go
@ -73,6 +73,8 @@ type AgentClient struct {

 	Log *log.Logger

+	Reply string
+
 	Do func(req *http.Request) (*http.Response, error)
 }

@ -186,7 +188,7 @@ func (client AgentClient) ToolLoop(info ToolHandlerInfo, req *AgentRequestBody)

 var FinishedCall = errors.New("Last tool tool was called")

-func (client AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) error {
+func (client *AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) error {
 	var err error

 	message, err := req.Chat.GetLatest()
@ -211,6 +213,10 @@ func (client AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) e

 		toolResponse := client.ToolHandler.Handle(info, toolCall)

+		if toolCall.Function.Name == "reply" {
+			client.Reply = toolCall.Function.Arguments
+		}
+
 		client.Log.SetLevel(log.DebugLevel)
 		client.Log.Debugf("Response: %s", toolResponse.Content)

@ -249,9 +255,10 @@ func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToo
 	}

 	toolHandlerInfo := ToolHandlerInfo{
-		ImageId: imageId,
-		UserId:  userId,
-		Image:   &imageData,
+		ImageId:   imageId,
+		ImageName: imageName,
+		UserId:    userId,
+		Image:     &imageData,
 	}

 	return client.ToolLoop(toolHandlerInfo, &request)
--- a/backend/agents/client/tools.go
+++ b/backend/agents/client/tools.go
@ -8,8 +8,9 @@ import (
 )

 type ToolHandlerInfo struct {
-	UserId  uuid.UUID
-	ImageId uuid.UUID
+	UserId    uuid.UUID
+	ImageId   uuid.UUID
+	ImageName string

 	// Pointer because we don't want to copy this around too much.
 	Image *[]byte
--- a/backend/agents/event_agent.go
+++ b/backend/agents/event_agent.go
@ -27,6 +27,9 @@ Lists the users already existing events.
 createEvent
 Use this to create a new events.

+getEventLocationId
+Use this if the image contains a location or place. This tool will return the locationId.
+
 finish
 Call when there is nothing else to do.
 `
@ -68,6 +71,18 @@ const eventTools = `
                "required": ["name"]
            }
        }
+    },
+	{
+        "type": "function",
+        "function": {
+            "name": "getEventLocationId",
+            "description": "Get the ID of the location on the image, only use if the event contains a location or place.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
    },
 	{
        "type": "function",
@ -180,9 +195,11 @@ func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) (
 		return "Saved", nil
 	})

-	agentClient.ToolHandler.AddTool("getLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
-		// locationAgent.client.RunAgent()
-		return "no location found", nil
+	agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
+		query := "Can you get me the ID of the location present in this image?"
+		locationAgent.client.RunAgent(locationPrompt, locationTools, "finish", &query, info.UserId, info.ImageId, info.ImageName, *info.Image)
+
+		return locationAgent.client.Reply, nil
 	})

 	return agent, nil
--- a/backend/agents/location_agent.go
+++ b/backend/agents/location_agent.go
@ -27,6 +27,9 @@ Lists the users already existing locations.
 createLocation
 Use this to create a new location, when you don't see a matching one from listLocations call.

+reply
+Use this only if the user has asked a question about a location.
+
 finish
 Call when there is nothing else to do.
 `
@ -63,6 +66,22 @@ const locationTools = `
                "required": ["name"]
            }
        }
+    },
+	{
+        "type": "function",
+        "function": {
+            "name": "reply",
+            "description": "Reply to a user query, only if the user has asked something",
+            "parameters": {
+                "type": "object",
+                "properties": {
+					"locationId": {
+						"type": "string"
+					}
+				},
+                "required": ["locationId"]
+            }
+        }
    },
 	{
        "type": "function",
@ -157,5 +176,10 @@ func NewLocationAgent(locationModel models.LocationModel) (LocationAgent, error)
 		return "Saved", nil
 	})

+	agentClient.ToolHandler.AddTool("reply", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
+		agent.client.Log.Debug(args)
+		return "ok", nil
+	})
+
 	return agent, nil
 }
--- a/backend/events.go
+++ b/backend/events.go
@ -57,7 +57,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) {
 					panic(err)
 				}

-				eventAgent, err := agents.NewEventAgent(eventModel)
+				eventAgent, err := agents.NewEventAgent(eventModel, locationAgent)
 				if err != nil {
 					panic(err)
 				}
@ -82,7 +82,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) {

 				// Still need to find some way to hide this complexity away.
 				// I don't think wrapping agents in structs actually works too well.
-				err = orchestrator.Client.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image)
+				err = orchestrator.Client.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", nil, image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image)
 				if err != nil {
 					log.Println(err)
 				}