From 7b6bdf2c7bcca86d8c8da363df28c0e407a0ed4f Mon Sep 17 00:00:00 2001 From: John Costa Date: Tue, 15 Apr 2025 16:43:27 +0100 Subject: [PATCH 1/9] feat: Adding text message to describe an action3 --- backend/agents/client/chat.go | 47 +++++++++++++++++++++++++++------ backend/agents/client/client.go | 5 ++-- backend/agents/client/tools.go | 3 +++ 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/backend/agents/client/chat.go b/backend/agents/client/chat.go index 6628e56..c6dff19 100644 --- a/backend/agents/client/chat.go +++ b/backend/agents/client/chat.go @@ -65,8 +65,8 @@ func (m ChatUserMessage) MarshalJSON() ([]byte, error) { }) case ArrayMessage: return json.Marshal(&struct { - Role UserRole `json:"role"` - Content []ImageMessageContent `json:"content"` + Role UserRole `json:"role"` + Content []MessageContentMessage `json:"content"` }{ Role: User, Content: t.Content, @@ -121,18 +121,35 @@ func (m SingleMessage) IsSingleMessage() bool { } type ArrayMessage struct { - Content []ImageMessageContent `json:"content"` + Content []MessageContentMessage `json:"content"` } func (m ArrayMessage) IsSingleMessage() bool { return false } +type MessageContentMessage interface { + IsImageMessage() bool +} + +type TextMessageContent struct { + TextType string `json:"type"` + Text string `json:"text"` +} + +func (m TextMessageContent) IsImageMessage() bool { + return false +} + type ImageMessageContent struct { ImageType string `json:"type"` ImageUrl string `json:"image_url"` } +func (m ImageMessageContent) IsImageMessage() bool { + return true +} + type ImageContentUrl struct { Url string `json:"url"` } @@ -165,7 +182,7 @@ func (chat *Chat) AddSystem(prompt string) { }) } -func (chat *Chat) AddImage(imageName string, image []byte) error { +func (chat *Chat) AddImage(imageName string, image []byte, query *string) error { extension := filepath.Ext(imageName) if len(extension) == 0 { // TODO: could also validate for image types we support. @@ -173,14 +190,28 @@ func (chat *Chat) AddImage(imageName string, image []byte) error { } extension = extension[1:] - encodedString := base64.StdEncoding.EncodeToString(image) - messageContent := ArrayMessage{ - Content: make([]ImageMessageContent, 1), + contentLength := 1 + if query != nil { + contentLength = 2 } - messageContent.Content[0] = ImageMessageContent{ + messageContent := ArrayMessage{ + Content: make([]MessageContentMessage, contentLength), + } + + index := 0 + + if query != nil { + messageContent.Content[index] = TextMessageContent{ + TextType: "text", + Text: *query, + } + index += 1 + } + + messageContent.Content[index] = ImageMessageContent{ ImageType: "image_url", ImageUrl: fmt.Sprintf("data:image/%s;base64,%s", extension, encodedString), } diff --git a/backend/agents/client/client.go b/backend/agents/client/client.go index 244d539..850d358 100644 --- a/backend/agents/client/client.go +++ b/backend/agents/client/client.go @@ -220,7 +220,7 @@ func (client AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) e return err } -func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToolCall string, userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { +func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToolCall string, query *string, userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { var tools any err := json.Unmarshal([]byte(jsonTools), &tools) @@ -241,7 +241,7 @@ func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToo } request.Chat.AddSystem(systemPrompt) - request.Chat.AddImage(imageName, imageData) + request.Chat.AddImage(imageName, imageData, query) _, err = client.Request(&request) if err != nil { @@ -251,6 +251,7 @@ func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToo toolHandlerInfo := ToolHandlerInfo{ ImageId: imageId, UserId: userId, + Image: &imageData, } return client.ToolLoop(toolHandlerInfo, &request) diff --git a/backend/agents/client/tools.go b/backend/agents/client/tools.go index 8dc9244..8061311 100644 --- a/backend/agents/client/tools.go +++ b/backend/agents/client/tools.go @@ -10,6 +10,9 @@ import ( type ToolHandlerInfo struct { UserId uuid.UUID ImageId uuid.UUID + + // Pointer because we don't want to copy this around too much. + Image *[]byte } type ToolHandler struct { -- 2.47.2 From 7be669e49e1ab69aafbcb4e474506a056a3e6731 Mon Sep 17 00:00:00 2001 From: John Costa Date: Tue, 15 Apr 2025 16:44:00 +0100 Subject: [PATCH 2/9] wip(agents): allowing event agent to call location agent --- backend/agents/event_agent.go | 14 +++++++++++--- backend/agents/note_agent.go | 2 +- backend/agents/orchestrator.go | 6 +++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index b170cea..ae38a68 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -87,6 +87,8 @@ type EventAgent struct { client client.AgentClient eventsModel models.EventModel + + locationAgent LocationAgent } type listEventArguments struct{} @@ -100,7 +102,7 @@ type linkEventArguments struct { EventID string `json:"eventId"` } -func NewEventAgent(eventsModel models.EventModel) (EventAgent, error) { +func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) (EventAgent, error) { agentClient, err := client.CreateAgentClient(log.NewWithOptions(os.Stdout, log.Options{ ReportTimestamp: true, TimeFormat: time.Kitchen, @@ -112,8 +114,9 @@ func NewEventAgent(eventsModel models.EventModel) (EventAgent, error) { } agent := EventAgent{ - client: agentClient, - eventsModel: eventsModel, + client: agentClient, + eventsModel: eventsModel, + locationAgent: locationAgent, } agentClient.ToolHandler.AddTool("listEvents", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { @@ -177,5 +180,10 @@ func NewEventAgent(eventsModel models.EventModel) (EventAgent, error) { return "Saved", nil }) + agentClient.ToolHandler.AddTool("getLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { + // locationAgent.client.RunAgent() + return "no location found", nil + }) + return agent, nil } diff --git a/backend/agents/note_agent.go b/backend/agents/note_agent.go index 3802eb0..9a69f28 100644 --- a/backend/agents/note_agent.go +++ b/backend/agents/note_agent.go @@ -43,7 +43,7 @@ func (agent NoteAgent) GetNotes(userId uuid.UUID, imageId uuid.UUID, imageName s } request.Chat.AddSystem(noteAgentPrompt) - request.Chat.AddImage(imageName, imageData) + request.Chat.AddImage(imageName, imageData, nil) resp, err := agent.client.Request(&request) if err != nil { diff --git a/backend/agents/orchestrator.go b/backend/agents/orchestrator.go index 3cdfcc5..3778411 100644 --- a/backend/agents/orchestrator.go +++ b/backend/agents/orchestrator.go @@ -134,7 +134,7 @@ func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locati }) agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go contactAgent.client.RunAgent(contactPrompt, contactTools, "finish", info.UserId, info.ImageId, imageName, imageData) + go contactAgent.client.RunAgent(contactPrompt, contactTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, @@ -142,7 +142,7 @@ func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locati }) agent.ToolHandler.AddTool("locationAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go locationAgent.client.RunAgent(locationPrompt, locationTools, "finish", info.UserId, info.ImageId, imageName, imageData) + go locationAgent.client.RunAgent(locationPrompt, locationTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, @@ -150,7 +150,7 @@ func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locati }) agent.ToolHandler.AddTool("eventAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go eventAgent.client.RunAgent(eventPrompt, eventTools, "finish", info.UserId, info.ImageId, imageName, imageData) + go eventAgent.client.RunAgent(eventPrompt, eventTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, -- 2.47.2 From fa127c23318bbc84d88b96eb173a27e1b48374b7 Mon Sep 17 00:00:00 2001 From: John Costa Date: Wed, 16 Apr 2025 14:43:07 +0100 Subject: [PATCH 3/9] feat: event agent calling location agent about location ID This is pretty nice. We can now have agents spawn other agents and actually get super cool functionality from it. The pattern might be a little fragile. --- backend/agents/client/client.go | 15 +++++++++++---- backend/agents/client/tools.go | 5 +++-- backend/agents/event_agent.go | 23 ++++++++++++++++++++--- backend/agents/location_agent.go | 24 ++++++++++++++++++++++++ backend/events.go | 4 ++-- 5 files changed, 60 insertions(+), 11 deletions(-) diff --git a/backend/agents/client/client.go b/backend/agents/client/client.go index 850d358..c08b65d 100644 --- a/backend/agents/client/client.go +++ b/backend/agents/client/client.go @@ -73,6 +73,8 @@ type AgentClient struct { Log *log.Logger + Reply string + Do func(req *http.Request) (*http.Response, error) } @@ -186,7 +188,7 @@ func (client AgentClient) ToolLoop(info ToolHandlerInfo, req *AgentRequestBody) var FinishedCall = errors.New("Last tool tool was called") -func (client AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) error { +func (client *AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) error { var err error message, err := req.Chat.GetLatest() @@ -211,6 +213,10 @@ func (client AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) e toolResponse := client.ToolHandler.Handle(info, toolCall) + if toolCall.Function.Name == "reply" { + client.Reply = toolCall.Function.Arguments + } + client.Log.SetLevel(log.DebugLevel) client.Log.Debugf("Response: %s", toolResponse.Content) @@ -249,9 +255,10 @@ func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToo } toolHandlerInfo := ToolHandlerInfo{ - ImageId: imageId, - UserId: userId, - Image: &imageData, + ImageId: imageId, + ImageName: imageName, + UserId: userId, + Image: &imageData, } return client.ToolLoop(toolHandlerInfo, &request) diff --git a/backend/agents/client/tools.go b/backend/agents/client/tools.go index 8061311..d58c12d 100644 --- a/backend/agents/client/tools.go +++ b/backend/agents/client/tools.go @@ -8,8 +8,9 @@ import ( ) type ToolHandlerInfo struct { - UserId uuid.UUID - ImageId uuid.UUID + UserId uuid.UUID + ImageId uuid.UUID + ImageName string // Pointer because we don't want to copy this around too much. Image *[]byte diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index ae38a68..0e1c6f9 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -27,6 +27,9 @@ Lists the users already existing events. createEvent Use this to create a new events. +getEventLocationId +Use this if the image contains a location or place. This tool will return the locationId. + finish Call when there is nothing else to do. ` @@ -68,6 +71,18 @@ const eventTools = ` "required": ["name"] } } + }, + { + "type": "function", + "function": { + "name": "getEventLocationId", + "description": "Get the ID of the location on the image, only use if the event contains a location or place.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } }, { "type": "function", @@ -180,9 +195,11 @@ func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) ( return "Saved", nil }) - agentClient.ToolHandler.AddTool("getLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - // locationAgent.client.RunAgent() - return "no location found", nil + agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { + query := "Can you get me the ID of the location present in this image?" + locationAgent.client.RunAgent(locationPrompt, locationTools, "finish", &query, info.UserId, info.ImageId, info.ImageName, *info.Image) + + return locationAgent.client.Reply, nil }) return agent, nil diff --git a/backend/agents/location_agent.go b/backend/agents/location_agent.go index 792f1e9..24f8d41 100644 --- a/backend/agents/location_agent.go +++ b/backend/agents/location_agent.go @@ -27,6 +27,9 @@ Lists the users already existing locations. createLocation Use this to create a new location, when you don't see a matching one from listLocations call. +reply +Use this only if the user has asked a question about a location. + finish Call when there is nothing else to do. ` @@ -63,6 +66,22 @@ const locationTools = ` "required": ["name"] } } + }, + { + "type": "function", + "function": { + "name": "reply", + "description": "Reply to a user query, only if the user has asked something", + "parameters": { + "type": "object", + "properties": { + "locationId": { + "type": "string" + } + }, + "required": ["locationId"] + } + } }, { "type": "function", @@ -157,5 +176,10 @@ func NewLocationAgent(locationModel models.LocationModel) (LocationAgent, error) return "Saved", nil }) + agentClient.ToolHandler.AddTool("reply", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { + agent.client.Log.Debug(args) + return "ok", nil + }) + return agent, nil } diff --git a/backend/events.go b/backend/events.go index 9f7aead..2c4dba5 100644 --- a/backend/events.go +++ b/backend/events.go @@ -57,7 +57,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { panic(err) } - eventAgent, err := agents.NewEventAgent(eventModel) + eventAgent, err := agents.NewEventAgent(eventModel, locationAgent) if err != nil { panic(err) } @@ -82,7 +82,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { // Still need to find some way to hide this complexity away. // I don't think wrapping agents in structs actually works too well. - err = orchestrator.Client.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) + err = orchestrator.Client.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", nil, image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) if err != nil { log.Println(err) } -- 2.47.2 From 1651926c4d1856ab5f2fa7d6ef5fdb21de4777db Mon Sep 17 00:00:00 2001 From: John Costa Date: Thu, 17 Apr 2025 10:36:11 +0100 Subject: [PATCH 4/9] refactor(agents): no need to wrap them in another struct --- backend/agents/contact_agent.go | 33 ++++++----------------- backend/agents/event_agent.go | 39 +++++++-------------------- backend/agents/location_agent.go | 35 +++++++------------------ backend/agents/note_agent.go | 10 ++----- backend/agents/orchestrator.go | 22 +++++----------- backend/events.go | 45 +++++++++++++++++++++----------- 6 files changed, 66 insertions(+), 118 deletions(-) diff --git a/backend/agents/contact_agent.go b/backend/agents/contact_agent.go index 7dd45ce..eacbb28 100644 --- a/backend/agents/contact_agent.go +++ b/backend/agents/contact_agent.go @@ -3,11 +3,9 @@ package agents import ( "context" "encoding/json" - "os" "screenmark/screenmark/.gen/haystack/haystack/model" "screenmark/screenmark/agents/client" "screenmark/screenmark/models" - "time" "github.com/charmbracelet/log" "github.com/google/uuid" @@ -81,12 +79,6 @@ const contactTools = ` ] ` -type ContactAgent struct { - client client.AgentClient - - contactModel models.ContactModel -} - type listContactsArguments struct{} type createContactsArguments struct { Name string `json:"name"` @@ -98,23 +90,14 @@ type linkContactArguments struct { ContactID string `json:"contactId"` } -func NewContactAgent(contactModel models.ContactModel) (ContactAgent, error) { - agentClient, err := client.CreateAgentClient(log.NewWithOptions(os.Stdout, log.Options{ - ReportTimestamp: true, - TimeFormat: time.Kitchen, - Prefix: "Contacts 👥", - })) +func NewContactAgent(log *log.Logger, contactModel models.ContactModel) (client.AgentClient, error) { + agentClient, err := client.CreateAgentClient(log) if err != nil { - return ContactAgent{}, err - } - - agent := ContactAgent{ - client: agentClient, - contactModel: contactModel, + return client.AgentClient{}, err } agentClient.ToolHandler.AddTool("listContacts", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - return agent.contactModel.List(context.Background(), info.UserId) + return contactModel.List(context.Background(), info.UserId) }) agentClient.ToolHandler.AddTool("createContact", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) { @@ -126,7 +109,7 @@ func NewContactAgent(contactModel models.ContactModel) (ContactAgent, error) { ctx := context.Background() - contact, err := agent.contactModel.Save(ctx, info.UserId, model.Contacts{ + contact, err := contactModel.Save(ctx, info.UserId, model.Contacts{ Name: args.Name, PhoneNumber: args.PhoneNumber, Email: args.Email, @@ -136,7 +119,7 @@ func NewContactAgent(contactModel models.ContactModel) (ContactAgent, error) { return model.Contacts{}, err } - _, err = agent.contactModel.SaveToImage(ctx, info.ImageId, contact.ID) + _, err = contactModel.SaveToImage(ctx, info.ImageId, contact.ID) if err != nil { return model.Contacts{}, err } @@ -158,7 +141,7 @@ func NewContactAgent(contactModel models.ContactModel) (ContactAgent, error) { return "", err } - _, err = agent.contactModel.SaveToImage(ctx, info.ImageId, contactUuid) + _, err = contactModel.SaveToImage(ctx, info.ImageId, contactUuid) if err != nil { return "", err } @@ -166,5 +149,5 @@ func NewContactAgent(contactModel models.ContactModel) (ContactAgent, error) { return "Saved", nil }) - return agent, nil + return agentClient, nil } diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index 0e1c6f9..609f1f0 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -3,7 +3,6 @@ package agents import ( "context" "encoding/json" - "os" "screenmark/screenmark/.gen/haystack/haystack/model" "screenmark/screenmark/agents/client" "screenmark/screenmark/models" @@ -98,14 +97,6 @@ const eventTools = ` } ]` -type EventAgent struct { - client client.AgentClient - - eventsModel models.EventModel - - locationAgent LocationAgent -} - type listEventArguments struct{} type createEventArguments struct { Name string `json:"name"` @@ -117,25 +108,15 @@ type linkEventArguments struct { EventID string `json:"eventId"` } -func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) (EventAgent, error) { - agentClient, err := client.CreateAgentClient(log.NewWithOptions(os.Stdout, log.Options{ - ReportTimestamp: true, - TimeFormat: time.Kitchen, - Prefix: "Events 📍", - })) +func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationAgent client.AgentClient) (client.AgentClient, error) { + agentClient, err := client.CreateAgentClient(log) if err != nil { - return EventAgent{}, err - } - - agent := EventAgent{ - client: agentClient, - eventsModel: eventsModel, - locationAgent: locationAgent, + return client.AgentClient{}, err } agentClient.ToolHandler.AddTool("listEvents", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - return agent.eventsModel.List(context.Background(), info.UserId) + return eventsModel.List(context.Background(), info.UserId) }) agentClient.ToolHandler.AddTool("createEvent", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) { @@ -159,7 +140,7 @@ func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) ( return model.Events{}, err } - events, err := agent.eventsModel.Save(ctx, info.UserId, model.Events{ + events, err := eventsModel.Save(ctx, info.UserId, model.Events{ Name: args.Name, StartDateTime: &startTime, EndDateTime: &endTime, @@ -169,7 +150,7 @@ func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) ( return model.Events{}, err } - _, err = agent.eventsModel.SaveToImage(ctx, info.ImageId, events.ID) + _, err = eventsModel.SaveToImage(ctx, info.ImageId, events.ID) if err != nil { return model.Events{}, err } @@ -191,16 +172,16 @@ func NewEventAgent(eventsModel models.EventModel, locationAgent LocationAgent) ( return "", err } - agent.eventsModel.SaveToImage(ctx, info.ImageId, contactUuid) + eventsModel.SaveToImage(ctx, info.ImageId, contactUuid) return "Saved", nil }) agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { query := "Can you get me the ID of the location present in this image?" - locationAgent.client.RunAgent(locationPrompt, locationTools, "finish", &query, info.UserId, info.ImageId, info.ImageName, *info.Image) + locationAgent.RunAgent(locationPrompt, locationTools, "finish", &query, info.UserId, info.ImageId, info.ImageName, *info.Image) - return locationAgent.client.Reply, nil + return locationAgent.Reply, nil }) - return agent, nil + return agentClient, nil } diff --git a/backend/agents/location_agent.go b/backend/agents/location_agent.go index 24f8d41..0c1b4be 100644 --- a/backend/agents/location_agent.go +++ b/backend/agents/location_agent.go @@ -3,11 +3,9 @@ package agents import ( "context" "encoding/json" - "os" "screenmark/screenmark/.gen/haystack/haystack/model" "screenmark/screenmark/agents/client" "screenmark/screenmark/models" - "time" "github.com/charmbracelet/log" "github.com/google/uuid" @@ -97,12 +95,6 @@ const locationTools = ` } ]` -type LocationAgent struct { - client client.AgentClient - - locationModel models.LocationModel -} - type listLocationArguments struct{} type createLocationArguments struct { Name string `json:"name"` @@ -112,24 +104,15 @@ type linkLocationArguments struct { LocationID string `json:"locationId"` } -func NewLocationAgent(locationModel models.LocationModel) (LocationAgent, error) { - agentClient, err := client.CreateAgentClient(log.NewWithOptions(os.Stdout, log.Options{ - ReportTimestamp: true, - TimeFormat: time.Kitchen, - Prefix: "Locations 📍", - })) +func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) (client.AgentClient, error) { + agentClient, err := client.CreateAgentClient(log) if err != nil { - return LocationAgent{}, err - } - - agent := LocationAgent{ - client: agentClient, - locationModel: locationModel, + return client.AgentClient{}, err } agentClient.ToolHandler.AddTool("listLocations", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - return agent.locationModel.List(context.Background(), info.UserId) + return locationModel.List(context.Background(), info.UserId) }) agentClient.ToolHandler.AddTool("createLocation", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) { @@ -141,7 +124,7 @@ func NewLocationAgent(locationModel models.LocationModel) (LocationAgent, error) ctx := context.Background() - location, err := agent.locationModel.Save(ctx, info.UserId, model.Locations{ + location, err := locationModel.Save(ctx, info.UserId, model.Locations{ Name: args.Name, Address: args.Address, }) @@ -150,7 +133,7 @@ func NewLocationAgent(locationModel models.LocationModel) (LocationAgent, error) return model.Locations{}, err } - _, err = agent.locationModel.SaveToImage(ctx, info.ImageId, location.ID) + _, err = locationModel.SaveToImage(ctx, info.ImageId, location.ID) if err != nil { return model.Locations{}, err } @@ -172,14 +155,14 @@ func NewLocationAgent(locationModel models.LocationModel) (LocationAgent, error) return "", err } - agent.locationModel.SaveToImage(ctx, info.ImageId, contactUuid) + locationModel.SaveToImage(ctx, info.ImageId, contactUuid) return "Saved", nil }) agentClient.ToolHandler.AddTool("reply", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - agent.client.Log.Debug(args) + agentClient.Log.Debug(args) return "ok", nil }) - return agent, nil + return agentClient, nil } diff --git a/backend/agents/note_agent.go b/backend/agents/note_agent.go index 9a69f28..81a2d57 100644 --- a/backend/agents/note_agent.go +++ b/backend/agents/note_agent.go @@ -2,11 +2,9 @@ package agents import ( "context" - "os" "screenmark/screenmark/.gen/haystack/haystack/model" "screenmark/screenmark/agents/client" "screenmark/screenmark/models" - "time" "github.com/charmbracelet/log" "github.com/google/uuid" @@ -70,12 +68,8 @@ func (agent NoteAgent) GetNotes(userId uuid.UUID, imageId uuid.UUID, imageName s return nil } -func NewNoteAgent(noteModel models.NoteModel) (NoteAgent, error) { - client, err := client.CreateAgentClient(log.NewWithOptions(os.Stdout, log.Options{ - ReportTimestamp: true, - TimeFormat: time.Kitchen, - Prefix: "Notes 📝", - })) +func NewNoteAgent(log *log.Logger, noteModel models.NoteModel) (NoteAgent, error) { + client, err := client.CreateAgentClient(log) if err != nil { return NoteAgent{}, err } diff --git a/backend/agents/orchestrator.go b/backend/agents/orchestrator.go index 3778411..3b090bb 100644 --- a/backend/agents/orchestrator.go +++ b/backend/agents/orchestrator.go @@ -2,9 +2,7 @@ package agents import ( "errors" - "os" "screenmark/screenmark/agents/client" - "time" "github.com/charmbracelet/log" ) @@ -114,15 +112,11 @@ type Status struct { Ok bool `json:"ok"` } -func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locationAgent LocationAgent, eventAgent EventAgent, imageName string, imageData []byte) (OrchestratorAgent, error) { - agent, err := client.CreateAgentClient(log.NewWithOptions(os.Stdout, log.Options{ - ReportTimestamp: true, - TimeFormat: time.Kitchen, - Prefix: "Orchestrator 🎼", - })) +func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent client.AgentClient, locationAgent client.AgentClient, eventAgent client.AgentClient, imageName string, imageData []byte) (client.AgentClient, error) { + agent, err := client.CreateAgentClient(log) if err != nil { - return OrchestratorAgent{}, err + return client.AgentClient{}, err } agent.ToolHandler.AddTool("noteAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { @@ -134,7 +128,7 @@ func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locati }) agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go contactAgent.client.RunAgent(contactPrompt, contactTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) + go contactAgent.RunAgent(contactPrompt, contactTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, @@ -142,7 +136,7 @@ func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locati }) agent.ToolHandler.AddTool("locationAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go locationAgent.client.RunAgent(locationPrompt, locationTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) + go locationAgent.RunAgent(locationPrompt, locationTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, @@ -150,7 +144,7 @@ func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locati }) agent.ToolHandler.AddTool("eventAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go eventAgent.client.RunAgent(eventPrompt, eventTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) + go agent.RunAgent(eventPrompt, eventTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, @@ -165,7 +159,5 @@ func NewOrchestratorAgent(noteAgent NoteAgent, contactAgent ContactAgent, locati }, errors.New("Finished! Kinda bad return type but...") }) - return OrchestratorAgent{ - Client: agent, - }, nil + return agent, nil } diff --git a/backend/events.go b/backend/events.go index 2c4dba5..b06174d 100644 --- a/backend/events.go +++ b/backend/events.go @@ -4,16 +4,24 @@ import ( "context" "database/sql" "fmt" - "log" "os" "screenmark/screenmark/agents" "screenmark/screenmark/models" "time" + "github.com/charmbracelet/log" "github.com/google/uuid" "github.com/lib/pq" ) +func createLogger(prefix string) *log.Logger { + return log.NewWithOptions(os.Stdout, log.Options{ + ReportTimestamp: true, + TimeFormat: time.Kitchen, + Prefix: prefix, + }) +} + func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { listener := pq.NewListener(os.Getenv("DB_CONNECTION"), time.Second, time.Second, func(event pq.ListenerEventType, err error) { if err != nil { @@ -28,6 +36,8 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { imageModel := models.NewImageModel(db) contactModel := models.NewContactModel(db) + databaseEventLog := createLogger("Database Events 🤖") + err := listener.Listen("new_image") if err != nil { panic(err) @@ -39,55 +49,60 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { imageId := uuid.MustParse(parameters.Extra) eventManager.listeners[parameters.Extra] = make(chan string) + databaseEventLog.Debug("Starting processing image", "ImageID", imageId) + ctx := context.Background() go func() { - noteAgent, err := agents.NewNoteAgent(noteModel) + noteAgent, err := agents.NewNoteAgent(createLogger("Notes 📝"), noteModel) if err != nil { panic(err) } - contactAgent, err := agents.NewContactAgent(contactModel) + contactAgent, err := agents.NewContactAgent(createLogger("Contacts 👥"), contactModel) if err != nil { panic(err) } - locationAgent, err := agents.NewLocationAgent(locationModel) + locationAgent, err := agents.NewLocationAgent(createLogger("Locations 📍"), locationModel) if err != nil { panic(err) } - eventAgent, err := agents.NewEventAgent(eventModel, locationAgent) + eventAgent, err := agents.NewEventAgent(createLogger("Events 📅"), eventModel, locationAgent) if err != nil { panic(err) } image, err := imageModel.GetToProcessWithData(ctx, imageId) if err != nil { - log.Println("Failed to GetToProcessWithData") - log.Println(err) + log.Error("Failed to GetToProcessWithData", "error", err) return } if err := imageModel.StartProcessing(ctx, image.ID); err != nil { - log.Println("Failed to FinishProcessing") - log.Println(err) + log.Error("Failed to FinishProcessing", "error", err) return } - orchestrator, err := agents.NewOrchestratorAgent(noteAgent, contactAgent, locationAgent, eventAgent, image.Image.ImageName, image.Image.Image) + orchestrator, err := agents.NewOrchestratorAgent(createLogger("Orchestrator 🎼"), noteAgent, contactAgent, locationAgent, eventAgent, image.Image.ImageName, image.Image.Image) if err != nil { panic(err) } - // Still need to find some way to hide this complexity away. - // I don't think wrapping agents in structs actually works too well. - err = orchestrator.Client.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", nil, image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) + err = orchestrator.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", nil, image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) if err != nil { - log.Println(err) + log.Error("Orchestrator failed", "error", "err") + return } - imageModel.FinishProcessing(ctx, image.ID) + _, err = imageModel.FinishProcessing(ctx, image.ID) + if err != nil { + log.Error("Failed to finish processing", "ImageID", imageId) + return + } + + databaseEventLog.Debug("Starting processing image", "ImageID", imageId) }() } } -- 2.47.2 From 8fed2f9b9a27ee84de231bccb95155e1f4809871 Mon Sep 17 00:00:00 2001 From: John Costa Date: Thu, 17 Apr 2025 10:48:30 +0100 Subject: [PATCH 5/9] fix: using correct eventAgent instead of orchestrator bug + better logging --- backend/agents/event_agent.go | 1 + backend/agents/orchestrator.go | 2 +- backend/events.go | 13 +++++-------- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index 609f1f0..712690d 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -178,6 +178,7 @@ func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationAgent agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { query := "Can you get me the ID of the location present in this image?" + locationAgent.Log = log.With("Locations 📍", true) locationAgent.RunAgent(locationPrompt, locationTools, "finish", &query, info.UserId, info.ImageId, info.ImageName, *info.Image) return locationAgent.Reply, nil diff --git a/backend/agents/orchestrator.go b/backend/agents/orchestrator.go index 3b090bb..d3e072e 100644 --- a/backend/agents/orchestrator.go +++ b/backend/agents/orchestrator.go @@ -144,7 +144,7 @@ func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent cli }) agent.ToolHandler.AddTool("eventAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go agent.RunAgent(eventPrompt, eventTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) + go eventAgent.RunAgent(eventPrompt, eventTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, diff --git a/backend/events.go b/backend/events.go index b06174d..367cdd1 100644 --- a/backend/events.go +++ b/backend/events.go @@ -3,7 +3,6 @@ package main import ( "context" "database/sql" - "fmt" "os" "screenmark/screenmark/agents" "screenmark/screenmark/models" @@ -37,6 +36,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { contactModel := models.NewContactModel(db) databaseEventLog := createLogger("Database Events 🤖") + databaseEventLog.SetLevel(log.DebugLevel) err := listener.Listen("new_image") if err != nil { @@ -76,12 +76,12 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { image, err := imageModel.GetToProcessWithData(ctx, imageId) if err != nil { - log.Error("Failed to GetToProcessWithData", "error", err) + databaseEventLog.Error("Failed to GetToProcessWithData", "error", err) return } if err := imageModel.StartProcessing(ctx, image.ID); err != nil { - log.Error("Failed to FinishProcessing", "error", err) + databaseEventLog.Error("Failed to FinishProcessing", "error", err) return } @@ -92,13 +92,13 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { err = orchestrator.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", nil, image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) if err != nil { - log.Error("Orchestrator failed", "error", "err") + databaseEventLog.Error("Orchestrator failed", "error", err) return } _, err = imageModel.FinishProcessing(ctx, image.ID) if err != nil { - log.Error("Failed to finish processing", "ImageID", imageId) + databaseEventLog.Error("Failed to finish processing", "ImageID", imageId) return } @@ -137,9 +137,6 @@ func ListenProcessingImageStatus(db *sql.DB, eventManager *EventManager) { stringUuid := data.Extra[0:36] status := data.Extra[36:] - fmt.Printf("UUID: %s\n", stringUuid) - fmt.Printf("Receiving :s\n", data.Extra) - imageListener, exists := eventManager.listeners[stringUuid] if !exists { continue -- 2.47.2 From c4569e925b0277f30ff0baad5e0fae7322d725c8 Mon Sep 17 00:00:00 2001 From: John Costa Date: Thu, 17 Apr 2025 10:58:19 +0100 Subject: [PATCH 6/9] refactor(agents): encapsulating prompt and calls inside factory method --- backend/agents/client/client.go | 30 +++++++++++++++++++++--------- backend/agents/contact_agent.go | 10 ++++++---- backend/agents/event_agent.go | 18 ++++++++++-------- backend/agents/location_agent.go | 11 ++++++----- backend/agents/note_agent.go | 7 +++---- backend/agents/orchestrator.go | 21 +++++++++++---------- backend/events.go | 2 +- 7 files changed, 58 insertions(+), 41 deletions(-) diff --git a/backend/agents/client/client.go b/backend/agents/client/client.go index c08b65d..29370c1 100644 --- a/backend/agents/client/client.go +++ b/backend/agents/client/client.go @@ -76,15 +76,25 @@ type AgentClient struct { Reply string Do func(req *http.Request) (*http.Response, error) + + Options CreateAgentClientOptions } const OPENAI_API_KEY = "OPENAI_API_KEY" -func CreateAgentClient(log *log.Logger) (AgentClient, error) { +type CreateAgentClientOptions struct { + Log *log.Logger + SystemPrompt string + JsonTools string + EndToolCall string + Query *string +} + +func CreateAgentClient(options CreateAgentClientOptions) AgentClient { apiKey := os.Getenv(OPENAI_API_KEY) if len(apiKey) == 0 { - return AgentClient{}, errors.New(OPENAI_API_KEY + " was not found.") + panic("No api key") } return AgentClient{ @@ -95,12 +105,14 @@ func CreateAgentClient(log *log.Logger) (AgentClient, error) { return client.Do(req) }, - Log: log, + Log: options.Log, ToolHandler: ToolsHandlers{ handlers: map[string]ToolHandler{}, }, - }, nil + + Options: options, + } } func (client AgentClient) getRequest(body []byte) (*http.Request, error) { @@ -226,9 +238,9 @@ func (client *AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) return err } -func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToolCall string, query *string, userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { +func (client AgentClient) RunAgent(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { var tools any - err := json.Unmarshal([]byte(jsonTools), &tools) + err := json.Unmarshal([]byte(client.Options.JsonTools), &tools) toolChoice := "any" @@ -237,7 +249,7 @@ func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToo ToolChoice: &toolChoice, Model: "pixtral-12b-2409", Temperature: 0.3, - EndToolCall: endToolCall, + EndToolCall: client.Options.EndToolCall, ResponseFormat: ResponseFormat{ Type: "text", }, @@ -246,8 +258,8 @@ func (client AgentClient) RunAgent(systemPrompt string, jsonTools string, endToo }, } - request.Chat.AddSystem(systemPrompt) - request.Chat.AddImage(imageName, imageData, query) + request.Chat.AddSystem(client.Options.SystemPrompt) + request.Chat.AddImage(imageName, imageData, client.Options.Query) _, err = client.Request(&request) if err != nil { diff --git a/backend/agents/contact_agent.go b/backend/agents/contact_agent.go index eacbb28..2b105df 100644 --- a/backend/agents/contact_agent.go +++ b/backend/agents/contact_agent.go @@ -91,10 +91,12 @@ type linkContactArguments struct { } func NewContactAgent(log *log.Logger, contactModel models.ContactModel) (client.AgentClient, error) { - agentClient, err := client.CreateAgentClient(log) - if err != nil { - return client.AgentClient{}, err - } + agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{ + SystemPrompt: contactPrompt, + JsonTools: contactTools, + Log: log, + EndToolCall: "finish", + }) agentClient.ToolHandler.AddTool("listContacts", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { return contactModel.List(context.Background(), info.UserId) diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index 712690d..150c77e 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -109,11 +109,12 @@ type linkEventArguments struct { } func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationAgent client.AgentClient) (client.AgentClient, error) { - agentClient, err := client.CreateAgentClient(log) - - if err != nil { - return client.AgentClient{}, err - } + agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{ + SystemPrompt: eventPrompt, + JsonTools: eventTools, + Log: log, + EndToolCall: "finish", + }) agentClient.ToolHandler.AddTool("listEvents", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { return eventsModel.List(context.Background(), info.UserId) @@ -177,9 +178,10 @@ func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationAgent }) agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - query := "Can you get me the ID of the location present in this image?" - locationAgent.Log = log.With("Locations 📍", true) - locationAgent.RunAgent(locationPrompt, locationTools, "finish", &query, info.UserId, info.ImageId, info.ImageName, *info.Image) + // TODO: reenable this when I'm creating the agent locally instead of getting it from above. + // query := "Can you get me the ID of the location present in this image?" + // locationAgent.Log = log.With("Locations 📍", true) + // locationAgent.RunAgent(info.UserId, info.ImageId, info.ImageName, *info.Image) return locationAgent.Reply, nil }) diff --git a/backend/agents/location_agent.go b/backend/agents/location_agent.go index 0c1b4be..209d58c 100644 --- a/backend/agents/location_agent.go +++ b/backend/agents/location_agent.go @@ -105,11 +105,12 @@ type linkLocationArguments struct { } func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) (client.AgentClient, error) { - agentClient, err := client.CreateAgentClient(log) - - if err != nil { - return client.AgentClient{}, err - } + agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{ + SystemPrompt: locationPrompt, + JsonTools: locationTools, + Log: log, + EndToolCall: "finish", + }) agentClient.ToolHandler.AddTool("listLocations", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { return locationModel.List(context.Background(), info.UserId) diff --git a/backend/agents/note_agent.go b/backend/agents/note_agent.go index 81a2d57..fc26b8a 100644 --- a/backend/agents/note_agent.go +++ b/backend/agents/note_agent.go @@ -69,10 +69,9 @@ func (agent NoteAgent) GetNotes(userId uuid.UUID, imageId uuid.UUID, imageName s } func NewNoteAgent(log *log.Logger, noteModel models.NoteModel) (NoteAgent, error) { - client, err := client.CreateAgentClient(log) - if err != nil { - return NoteAgent{}, err - } + client := client.CreateAgentClient(client.CreateAgentClientOptions{ + SystemPrompt: noteAgentPrompt, + }) agent := NoteAgent{ client: client, diff --git a/backend/agents/orchestrator.go b/backend/agents/orchestrator.go index d3e072e..b5f68e4 100644 --- a/backend/agents/orchestrator.go +++ b/backend/agents/orchestrator.go @@ -7,7 +7,7 @@ import ( "github.com/charmbracelet/log" ) -const OrchestratorPrompt = ` +const orchestratorPrompt = ` You are an Orchestrator for various AI agents. The user will send you images and you have to determine which agents you have to call, in order to best help the user. @@ -38,7 +38,7 @@ Always call agents in parallel if you need to call more than 1. Do not call the agent if you do not think it is relevant for the image. ` -const OrchestratorTools = ` +const orchestratorTools = ` [ { "type": "function", @@ -113,11 +113,12 @@ type Status struct { } func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent client.AgentClient, locationAgent client.AgentClient, eventAgent client.AgentClient, imageName string, imageData []byte) (client.AgentClient, error) { - agent, err := client.CreateAgentClient(log) - - if err != nil { - return client.AgentClient{}, err - } + agent := client.CreateAgentClient(client.CreateAgentClientOptions{ + SystemPrompt: orchestratorPrompt, + JsonTools: orchestratorTools, + Log: log, + EndToolCall: "noAction", + }) agent.ToolHandler.AddTool("noteAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { go noteAgent.GetNotes(info.UserId, info.ImageId, imageName, imageData) @@ -128,7 +129,7 @@ func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent cli }) agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go contactAgent.RunAgent(contactPrompt, contactTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) + go contactAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, @@ -136,7 +137,7 @@ func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent cli }) agent.ToolHandler.AddTool("locationAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go locationAgent.RunAgent(locationPrompt, locationTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) + go locationAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, @@ -144,7 +145,7 @@ func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent cli }) agent.ToolHandler.AddTool("eventAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - go eventAgent.RunAgent(eventPrompt, eventTools, "finish", nil, info.UserId, info.ImageId, imageName, imageData) + go eventAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData) return Status{ Ok: true, diff --git a/backend/events.go b/backend/events.go index 367cdd1..e82bb8c 100644 --- a/backend/events.go +++ b/backend/events.go @@ -90,7 +90,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { panic(err) } - err = orchestrator.RunAgent(agents.OrchestratorPrompt, agents.OrchestratorTools, "noAction", nil, image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) + err = orchestrator.RunAgent(image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) if err != nil { databaseEventLog.Error("Orchestrator failed", "error", err) return -- 2.47.2 From 1e5028177f5c13b9c6a0e1e387ae893d20b20cfa Mon Sep 17 00:00:00 2001 From: John Costa Date: Thu, 17 Apr 2025 11:02:11 +0100 Subject: [PATCH 7/9] refactor(agents): not returning an error on factory method --- backend/agents/contact_agent.go | 4 ++-- backend/agents/event_agent.go | 12 +++++++----- backend/agents/location_agent.go | 4 ++-- backend/agents/note_agent.go | 4 ++-- backend/agents/orchestrator.go | 4 ++-- backend/events.go | 29 +++++------------------------ 6 files changed, 20 insertions(+), 37 deletions(-) diff --git a/backend/agents/contact_agent.go b/backend/agents/contact_agent.go index 2b105df..0383b8d 100644 --- a/backend/agents/contact_agent.go +++ b/backend/agents/contact_agent.go @@ -90,7 +90,7 @@ type linkContactArguments struct { ContactID string `json:"contactId"` } -func NewContactAgent(log *log.Logger, contactModel models.ContactModel) (client.AgentClient, error) { +func NewContactAgent(log *log.Logger, contactModel models.ContactModel) client.AgentClient { agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{ SystemPrompt: contactPrompt, JsonTools: contactTools, @@ -151,5 +151,5 @@ func NewContactAgent(log *log.Logger, contactModel models.ContactModel) (client. return "Saved", nil }) - return agentClient, nil + return agentClient } diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index 150c77e..2224988 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -108,7 +108,7 @@ type linkEventArguments struct { EventID string `json:"eventId"` } -func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationAgent client.AgentClient) (client.AgentClient, error) { +func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationModel models.LocationModel) client.AgentClient { agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{ SystemPrompt: eventPrompt, JsonTools: eventTools, @@ -116,6 +116,10 @@ func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationAgent EndToolCall: "finish", }) + locationAgent := NewLocationAgent(log.With("Locations 📍"), locationModel) + locationQuery := "Can you get me the ID of the location present in this image?" + locationAgent.Options.Query = &locationQuery + agentClient.ToolHandler.AddTool("listEvents", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { return eventsModel.List(context.Background(), info.UserId) }) @@ -179,12 +183,10 @@ func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationAgent agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { // TODO: reenable this when I'm creating the agent locally instead of getting it from above. - // query := "Can you get me the ID of the location present in this image?" - // locationAgent.Log = log.With("Locations 📍", true) - // locationAgent.RunAgent(info.UserId, info.ImageId, info.ImageName, *info.Image) + locationAgent.RunAgent(info.UserId, info.ImageId, info.ImageName, *info.Image) return locationAgent.Reply, nil }) - return agentClient, nil + return agentClient } diff --git a/backend/agents/location_agent.go b/backend/agents/location_agent.go index 209d58c..ba24e13 100644 --- a/backend/agents/location_agent.go +++ b/backend/agents/location_agent.go @@ -104,7 +104,7 @@ type linkLocationArguments struct { LocationID string `json:"locationId"` } -func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) (client.AgentClient, error) { +func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) client.AgentClient { agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{ SystemPrompt: locationPrompt, JsonTools: locationTools, @@ -165,5 +165,5 @@ func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) (clie return "ok", nil }) - return agentClient, nil + return agentClient } diff --git a/backend/agents/note_agent.go b/backend/agents/note_agent.go index fc26b8a..33b080f 100644 --- a/backend/agents/note_agent.go +++ b/backend/agents/note_agent.go @@ -68,7 +68,7 @@ func (agent NoteAgent) GetNotes(userId uuid.UUID, imageId uuid.UUID, imageName s return nil } -func NewNoteAgent(log *log.Logger, noteModel models.NoteModel) (NoteAgent, error) { +func NewNoteAgent(log *log.Logger, noteModel models.NoteModel) NoteAgent { client := client.CreateAgentClient(client.CreateAgentClientOptions{ SystemPrompt: noteAgentPrompt, }) @@ -78,5 +78,5 @@ func NewNoteAgent(log *log.Logger, noteModel models.NoteModel) (NoteAgent, error noteModel: noteModel, } - return agent, nil + return agent } diff --git a/backend/agents/orchestrator.go b/backend/agents/orchestrator.go index b5f68e4..830c22a 100644 --- a/backend/agents/orchestrator.go +++ b/backend/agents/orchestrator.go @@ -112,7 +112,7 @@ type Status struct { Ok bool `json:"ok"` } -func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent client.AgentClient, locationAgent client.AgentClient, eventAgent client.AgentClient, imageName string, imageData []byte) (client.AgentClient, error) { +func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent client.AgentClient, locationAgent client.AgentClient, eventAgent client.AgentClient, imageName string, imageData []byte) client.AgentClient { agent := client.CreateAgentClient(client.CreateAgentClientOptions{ SystemPrompt: orchestratorPrompt, JsonTools: orchestratorTools, @@ -160,5 +160,5 @@ func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent cli }, errors.New("Finished! Kinda bad return type but...") }) - return agent, nil + return agent } diff --git a/backend/events.go b/backend/events.go index e82bb8c..cfb2c90 100644 --- a/backend/events.go +++ b/backend/events.go @@ -54,25 +54,10 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { ctx := context.Background() go func() { - noteAgent, err := agents.NewNoteAgent(createLogger("Notes 📝"), noteModel) - if err != nil { - panic(err) - } - - contactAgent, err := agents.NewContactAgent(createLogger("Contacts 👥"), contactModel) - if err != nil { - panic(err) - } - - locationAgent, err := agents.NewLocationAgent(createLogger("Locations 📍"), locationModel) - if err != nil { - panic(err) - } - - eventAgent, err := agents.NewEventAgent(createLogger("Events 📅"), eventModel, locationAgent) - if err != nil { - panic(err) - } + noteAgent := agents.NewNoteAgent(createLogger("Notes 📝"), noteModel) + contactAgent := agents.NewContactAgent(createLogger("Contacts 👥"), contactModel) + locationAgent := agents.NewLocationAgent(createLogger("Locations 📍"), locationModel) + eventAgent := agents.NewEventAgent(createLogger("Events 📅"), eventModel, locationModel) image, err := imageModel.GetToProcessWithData(ctx, imageId) if err != nil { @@ -85,11 +70,7 @@ func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { return } - orchestrator, err := agents.NewOrchestratorAgent(createLogger("Orchestrator 🎼"), noteAgent, contactAgent, locationAgent, eventAgent, image.Image.ImageName, image.Image.Image) - if err != nil { - panic(err) - } - + orchestrator := agents.NewOrchestratorAgent(createLogger("Orchestrator 🎼"), noteAgent, contactAgent, locationAgent, eventAgent, image.Image.ImageName, image.Image.Image) err = orchestrator.RunAgent(image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image) if err != nil { databaseEventLog.Error("Orchestrator failed", "error", err) -- 2.47.2 From d1fd2aeaf1c0a7fb5e8b28795f30d6b6f98b9380 Mon Sep 17 00:00:00 2001 From: John Costa Date: Thu, 17 Apr 2025 11:07:37 +0100 Subject: [PATCH 8/9] fix(logger): nil pointer error + log debug level clean --- backend/agents/client/client.go | 3 --- backend/agents/event_agent.go | 2 +- backend/agents/note_agent.go | 1 + backend/events.go | 6 +++++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/agents/client/client.go b/backend/agents/client/client.go index 29370c1..42be680 100644 --- a/backend/agents/client/client.go +++ b/backend/agents/client/client.go @@ -160,8 +160,6 @@ func (client AgentClient) Request(req *AgentRequestBody) (AgentResponse, error) return AgentResponse{}, errors.New("Unsupported. We currently only accept 1 choice from AI.") } - client.Log.SetLevel(log.DebugLevel) - msg := agentResponse.Choices[0].Message if len(msg.Content) > 0 { @@ -229,7 +227,6 @@ func (client *AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) client.Reply = toolCall.Function.Arguments } - client.Log.SetLevel(log.DebugLevel) client.Log.Debugf("Response: %s", toolResponse.Content) req.Chat.AddToolResponse(toolResponse) diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index 2224988..e9575a3 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -116,7 +116,7 @@ func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationModel EndToolCall: "finish", }) - locationAgent := NewLocationAgent(log.With("Locations 📍"), locationModel) + locationAgent := NewLocationAgent(log.WithPrefix("Events 📅 > Locations 📍"), locationModel) locationQuery := "Can you get me the ID of the location present in this image?" locationAgent.Options.Query = &locationQuery diff --git a/backend/agents/note_agent.go b/backend/agents/note_agent.go index 33b080f..3b06c59 100644 --- a/backend/agents/note_agent.go +++ b/backend/agents/note_agent.go @@ -71,6 +71,7 @@ func (agent NoteAgent) GetNotes(userId uuid.UUID, imageId uuid.UUID, imageName s func NewNoteAgent(log *log.Logger, noteModel models.NoteModel) NoteAgent { client := client.CreateAgentClient(client.CreateAgentClientOptions{ SystemPrompt: noteAgentPrompt, + Log: log, }) agent := NoteAgent{ diff --git a/backend/events.go b/backend/events.go index cfb2c90..5d4a06e 100644 --- a/backend/events.go +++ b/backend/events.go @@ -14,11 +14,15 @@ import ( ) func createLogger(prefix string) *log.Logger { - return log.NewWithOptions(os.Stdout, log.Options{ + logger := log.NewWithOptions(os.Stdout, log.Options{ ReportTimestamp: true, TimeFormat: time.Kitchen, Prefix: prefix, }) + + logger.SetLevel(log.DebugLevel) + + return logger } func ListenNewImageEvents(db *sql.DB, eventManager *EventManager) { -- 2.47.2 From 8fff043849b5807e50cc62e8880d0e8f2f84222a Mon Sep 17 00:00:00 2001 From: John Costa Date: Thu, 17 Apr 2025 11:15:02 +0100 Subject: [PATCH 9/9] feat(event-location): communicating using tool calls correctly --- backend/agents/client/client.go | 4 ++-- backend/agents/event_agent.go | 8 +++++++- backend/agents/location_agent.go | 1 - 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/backend/agents/client/client.go b/backend/agents/client/client.go index 42be680..4186e54 100644 --- a/backend/agents/client/client.go +++ b/backend/agents/client/client.go @@ -182,7 +182,7 @@ func (client AgentClient) Request(req *AgentRequestBody) (AgentResponse, error) return agentResponse, nil } -func (client AgentClient) ToolLoop(info ToolHandlerInfo, req *AgentRequestBody) error { +func (client *AgentClient) ToolLoop(info ToolHandlerInfo, req *AgentRequestBody) error { for { err := client.Process(info, req) if err != nil { @@ -235,7 +235,7 @@ func (client *AgentClient) Process(info ToolHandlerInfo, req *AgentRequestBody) return err } -func (client AgentClient) RunAgent(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { +func (client *AgentClient) RunAgent(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { var tools any err := json.Unmarshal([]byte(client.Options.JsonTools), &tools) diff --git a/backend/agents/event_agent.go b/backend/agents/event_agent.go index e9575a3..b2c73a1 100644 --- a/backend/agents/event_agent.go +++ b/backend/agents/event_agent.go @@ -65,7 +65,12 @@ const eventTools = ` "endDateTime": { "type": "string", "description": "The end time as an ISO string" - } + }, + "locationId": { + "type": "string", + "description": "The UUID of this location. You should use getEventLocationId to get this information, but only if you believe the event contains a location" + + } }, "required": ["name"] } @@ -185,6 +190,7 @@ func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationModel // TODO: reenable this when I'm creating the agent locally instead of getting it from above. locationAgent.RunAgent(info.UserId, info.ImageId, info.ImageName, *info.Image) + log.Debugf("Reply from location %s\n", locationAgent.Reply) return locationAgent.Reply, nil }) diff --git a/backend/agents/location_agent.go b/backend/agents/location_agent.go index ba24e13..0c5f1e4 100644 --- a/backend/agents/location_agent.go +++ b/backend/agents/location_agent.go @@ -161,7 +161,6 @@ func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) clien }) agentClient.ToolHandler.AddTool("reply", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { - agentClient.Log.Debug(args) return "ok", nil }) -- 2.47.2