diff --git a/backend/agents/client/client.go b/backend/agents/client/client.go index 850d358..c08b65d 100644 --- a/backend/agents/client/client.go +++ b/backend/agents/client/client.go @@ -73,6 +73,8 @@ type AgentClient struct { Log *log.Logger + Reply string + Do func(req *http.Request) (*http.Response, error) } @@ -186,7 +188,7 @@ func (client AgentClient) ToolLoop(info ToolHandlerInfo, req *AgentRequestBody) var FinishedCall = errors.New("Last tool tool was called") -func (client AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) error { +func (client *AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) error { var err error message, err := req.Chat.GetLatest() @@ -211,6 +213,10 @@ func (client AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) e toolResponse := client.ToolHandler.Handle(info, toolCall) + if toolCall.Function.Name == "reply" { + client.Reply = toolCall.Function.Arguments + } + client.Log.SetLevel(log.DebugLevel) client.Log.Debugf("Response: %s", toolResponse.Content) @@ -249,9 +255,10 @@ func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToo } toolHandlerInfo := ToolHandlerInfo{ - ImageId: imageId, - UserId: userId, - Image: &imageData, + ImageId: imageId, + ImageName: imageName, + UserId: userId, + Image: &imageData, } return client.ToolLoop(toolHandlerInfo, &request) diff --git a/backend/agents/client/tools.go b/backend/agents/client/tools.go index 8061311..d58c12d 100644 --- a/backend/agents/client/tools.go +++ b/backend/agents/client/tools.go @@ -8,8 +8,9 @@ import ( ) type ToolHandlerInfo struct { - UserId uuid.UUID - ImageId uuid.UUID + UserId uuid.UUID + ImageId uuid.UUID + ImageName string // Pointer because we don't want to copy this around too much. Image *[]byte diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index ae38a68..0e1c6f9 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -27,6 +27,9 @@ Lists the users already existing events. createEvent Use this to create a new events. +getEventLocationId +Use this if the image contains a location or place. This tool will return the locationId. + finish Call when there is nothing else to do. ` @@ -68,6 +71,18 @@ const eventTools = ` "required": ["name"] } } + }, + { + "type": "function", + "function": { + "name": "getEventLocationId", + "description": "Get the ID of the location on the image, only use if the event contains a location or place.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } }, { "type": "function", @@ -180,9 +195,11 @@ func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) ( return "Saved", nil }) - agentClient.ToolHandler.AddTool("getLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - // locationAgent.client.RunAgent() - return "no location found", nil + agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { + query := "Can you get me the ID of the location present in this image?" + locationAgent.client.RunAgent(locationPrompt, locationTools, "finish", &query, info.UserId, info.ImageId, info.ImageName, *info.Image) + + return locationAgent.client.Reply, nil }) return agent, nil diff --git a/backend/agents/location_agent.go b/backend/agents/location_agent.go index 792f1e9..24f8d41 100644 --- a/backend/agents/location_agent.go +++ b/backend/agents/location_agent.go @@ -27,6 +27,9 @@ Lists the users already existing locations. createLocation Use this to create a new location, when you don't see a matching one from listLocations call. +reply +Use this only if the user has asked a question about a location. + finish Call when there is nothing else to do. ` @@ -63,6 +66,22 @@ const locationTools = ` "required": ["name"] } } + }, + { + "type": "function", + "function": { + "name": "reply", + "description": "Reply to a user query, only if the user has asked something", + "parameters": { + "type": "object", + "properties": { + "locationId": { + "type": "string" + } + }, + "required": ["locationId"] + } + } }, { "type": "function", @@ -157,5 +176,10 @@ func NewLocationAgent(locationModel models.LocationModel) (LocationAgent, error) return "Saved", nil }) + agentClient.ToolHandler.AddTool("reply", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { + agent.client.Log.Debug(args) + return "ok", nil + }) + return agent, nil } diff --git a/backend/events.go b/backend/events.go index 9f7aead..2c4dba5 100644 --- a/backend/events.go +++ b/backend/events.go @@ -57,7 +57,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { panic(err) } - eventAgent, err := agents.NewEventAgent(eventModel) + eventAgent, err := agents.NewEventAgent(eventModel, locationAgent) if err != nil { panic(err) } @@ -82,7 +82,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { // Still need to find some way to hide this complexity away. // I don't think wrapping agents in structs actually works too well. - err = orchestrator.Client.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) + err = orchestrator.Client.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", nil, image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) if err != nil { log.Println(err) }