From b0f4a45c40ec171e6ef095cbc2084c95e2936f02 Mon Sep 17 00:00:00 2001 From: John Costa Date: Fri, 11 Apr 2025 21:12:06 +0100 Subject: [PATCH] feat(contact-agent): working contact agent Built this in under 20 minutes. Getting some really good agents --- backend/agents/contact_agent.go | 175 +++++++++++++++++++++++++ backend/agents/event_location_agent.go | 2 + backend/agents/orchestrator.go | 28 +++- backend/events.go | 7 +- 4 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 backend/agents/contact_agent.go diff --git a/backend/agents/contact_agent.go b/backend/agents/contact_agent.go new file mode 100644 index 0000000..514f4c3 --- /dev/null +++ b/backend/agents/contact_agent.go @@ -0,0 +1,175 @@ +package agents + +import ( + "context" + "encoding/json" + "screenmark/screenmark/.gen/haystack/haystack/model" + "screenmark/screenmark/agents/client" + "screenmark/screenmark/models" + + "github.com/google/uuid" +) + +const contactPrompt = ` +You are an agent that performs actions on contacts and people you find on an image. + +You can use tools to achieve your task. + +You should use listContacts to make sure that you don't create duplicate contacts. + +Call createContact when you see there is a new contact on this image. + +Call finish if you dont think theres anything else to do. +` + +const contactTools = ` +[ + { + "type": "function", + "function": { + "name": "listContacts", + "description": "List the users existing contacts", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "createContact", + "description": "Creates a new contact", + "parameters": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "the name of the person" + }, + "phoneNumber": { + "type": "string" + }, + "address": { + "type": "string", + "description": "their physical address" + }, + "email": { + "type": "string" + } + }, + "required": ["name"] + } + } + }, + { + "type": "function", + "function": { + "name": "finish", + "description": "Call when you dont think theres anything to do", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + } +] +` + +type ContactAgent struct { + client client.AgentClient + + contactModel models.ContactModel +} + +type listContactsArguments struct{} +type createContactsArguments struct { + Name string `json:"name"` + PhoneNumber *string `json:"phoneNumber"` + Address *string `json:"address"` + Email *string `json:"email"` +} + +// Yeah this is just a copy of the other one. +func (agent ContactAgent) GetContacts(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { + var tools any + err := json.Unmarshal([]byte(contactTools), &tools) + + toolChoice := "any" + + request := client.AgentRequestBody{ + Tools: &tools, + ToolChoice: &toolChoice, + Model: "pixtral-12b-2409", + Temperature: 0.3, + EndToolCall: "finish", + ResponseFormat: client.ResponseFormat{ + Type: "text", + }, + Chat: &client.Chat{ + Messages: make([]client.ChatMessage, 0), + }, + } + + request.Chat.AddSystem(eventLocationPrompt) + request.Chat.AddImage(imageName, imageData) + + _, err = agent.client.Request(&request) + if err != nil { + return err + } + + toolHandlerInfo := client.ToolHandlerInfo{ + ImageId: imageId, + UserId: userId, + } + + return agent.client.ToolLoop(toolHandlerInfo, &request) +} + +func NewContactAgent(contactModel models.ContactModel) (ContactAgent, error) { + agentClient, err := client.CreateAgentClient() + if err != nil { + return ContactAgent{}, err + } + + agent := ContactAgent{ + client: agentClient, + contactModel: contactModel, + } + + agentClient.ToolHandler.AddTool("listContacts", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { + return agent.contactModel.List(context.Background(), info.UserId) + }) + + agentClient.ToolHandler.AddTool("createContact", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) { + args := createContactsArguments{} + err := json.Unmarshal([]byte(_args), &args) + if err != nil { + return model.Contacts{}, err + } + + ctx := context.Background() + + contact, err := agent.contactModel.Save(ctx, info.UserId, model.Contacts{ + Name: args.Name, + PhoneNumber: args.PhoneNumber, + Email: args.Email, + }) + + if err != nil { + return model.Contacts{}, err + } + + _, err = agent.contactModel.SaveToImage(ctx, info.ImageId, contact.ID) + if err != nil { + return model.Contacts{}, err + } + + return contact, nil + }) + + return agent, nil +} diff --git a/backend/agents/event_location_agent.go b/backend/agents/event_location_agent.go index 22672f6..551eeb4 100644 --- a/backend/agents/event_location_agent.go +++ b/backend/agents/event_location_agent.go @@ -120,6 +120,8 @@ type EventLocationAgent struct { toolHandler client.ToolsHandlers } +// TODO make these private + type ListLocationArguments struct{} type ListOrganizerArguments struct{} diff --git a/backend/agents/orchestrator.go b/backend/agents/orchestrator.go index 430699c..55c769d 100644 --- a/backend/agents/orchestrator.go +++ b/backend/agents/orchestrator.go @@ -28,11 +28,17 @@ noteAgent Use it when there is text on the screen. Any text, always use this. Use me! +contactAgent + +Use it when the image contains information relating a person. + defaultAgent When none of the above apply. Always call agents in parallel if you need to call more than 1. + +Do not call the agent if you do not think it is relevant for the image. ` const MY_TOOLS = ` @@ -60,6 +66,18 @@ const MY_TOOLS = ` "required": [] } } + }, + { + "type": "function", + "function": { + "name": "contactAgent", + "description": "Uses the contact/people agent", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } }, { "type": "function", @@ -128,7 +146,7 @@ func (agent OrchestratorAgent) Orchestrate(userId uuid.UUID, imageId uuid.UUID, return agent.client.ToolLoop(toolHandlerInfo, &request) } -func NewOrchestratorAgent(eventLocationAgent EventLocationAgent, noteAgent NoteAgent, imageName string, imageData []byte) (OrchestratorAgent, error) { +func NewOrchestratorAgent(eventLocationAgent EventLocationAgent, noteAgent NoteAgent, contactAgent ContactAgent, imageName string, imageData []byte) (OrchestratorAgent, error) { agent, err := client.CreateAgentClient() if err != nil { return OrchestratorAgent{}, err @@ -153,6 +171,14 @@ func NewOrchestratorAgent(eventLocationAgent EventLocationAgent, noteAgent NoteA }, nil }) + agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { + go contactAgent.GetContacts(info.UserId, info.ImageId, imageName, imageData) + + return Status{ + Ok: true, + }, nil + }) + agent.ToolHandler.AddTool("defaultAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) { // To nothing diff --git a/backend/events.go b/backend/events.go index 57d2eb3..c736995 100644 --- a/backend/events.go +++ b/backend/events.go @@ -50,6 +50,11 @@ func ListenNewImageEvents(db *sql.DB) { panic(err) } + contactAgent, err := agents.NewContactAgent(contactModel) + if err != nil { + panic(err) + } + image, err := imageModel.GetToProcessWithData(ctx, imageId) if err != nil { log.Println("Failed to GetToProcessWithData") @@ -64,7 +69,7 @@ func ListenNewImageEvents(db *sql.DB) { return } - orchestrator, err := agents.NewOrchestratorAgent(locationAgent, noteAgent, image.Image.ImageName, image.Image.Image) + orchestrator, err := agents.NewOrchestratorAgent(locationAgent, noteAgent, contactAgent, image.Image.ImageName, image.Image.Image) if err != nil { panic(err) }