Mistral's models seem to do something really strange if you allow for `tool_choice` to be anything but `any`. They start putting the tool call inside the `content` instead of an actual tool call. This means that I need this `stop` mechanism using a tool call instead because I cannot trust the model to do it by itself. I quite like this model though, it's cheap, it's fast and it's open source. And all the answers are pretty good!
231 lines
10 KiB
Go
231 lines
10 KiB
Go
package agents
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"screenmark/screenmark/.gen/haystack/haystack/model"
|
|
"screenmark/screenmark/agents/client"
|
|
"screenmark/screenmark/models"
|
|
"time"
|
|
|
|
"github.com/charmbracelet/log"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
const eventPrompt = `
|
|
**Role:** You are an Event Processing AI Assistant specialized in extracting event information from images, managing event data using provided tools, and ensuring accuracy and avoiding duplicates.
|
|
|
|
**Primary Goal:** To analyze images, identify potential events (like meetings, appointments, conferences, invitations), extract key details (name, date/time, location description), check against existing events, retrieve location identifiers if applicable, create new event entries when necessary, and signal completion using the 'finish' tool.
|
|
|
|
**Input:** You will be given an image that may contain information about an event, including details like name, date, time, and location.
|
|
|
|
**Core Workflow:**
|
|
|
|
1. **Image Analysis & Detail Extraction:**
|
|
* Carefully scan the image to identify potential event information.
|
|
* Extract key details: Event Name/Title, Start Date/Time, End Date/Time (if specified), and any Location Description mentioned.
|
|
* **If NO identifiable event information (Name and/or Date/Time) is found, proceed directly to Step 6 (call finish).** A location alone is not sufficient to trigger event creation.
|
|
|
|
2. **Duplicate Check (Mandatory if Event Found):**
|
|
* If potential event details were found, you **must** call the listEvents tool first to check for duplicates. **Generate only the listEvents tool call structure.**
|
|
* Once you receive the list, compare the extracted event details (Name, Start Date/Time primarily) against the existing events.
|
|
* **If a matching event already exists, proceed directly to Step 6 (call finish).**
|
|
|
|
3. **Location ID Retrieval (Conditional):**
|
|
* If the event is identified as *new* (Step 2) AND a *location description* was extracted (Step 1):
|
|
* Call the getEventLocationId tool, providing the extracted location description. **Generate only the getEventLocationId tool call structure.**
|
|
* Await the result containing the locationId. If the tool fails or doesn't return a locationId, proceed to Step 4 but omit the locationId.
|
|
|
|
4. **Create New Event (Conditional):**
|
|
* If the event was identified as *new* (Step 2):
|
|
* Prepare the parameters for the createEvent tool using the extracted details (Name, Start Date/Time, End Date/Time).
|
|
* If a locationId was successfully retrieved in Step 3, include it.
|
|
* Call the createEvent tool. **Generate only the createEvent tool call structure.**
|
|
|
|
5. **Handling Multiple Events:**
|
|
* If the image contains multiple distinct events, ideally process them one by one following steps 1-4 (or 1-2 then 6) for each before finishing. (Current structure implies one event per image interaction leading to finish).
|
|
|
|
6. **Task Completion / No Action Needed:**
|
|
* Call the finish tool **only** when one of the following conditions is met:
|
|
* No identifiable event information was found in the initial image analysis (Step 1).
|
|
* The listEvents check confirmed the identified event already exists (Step 2).
|
|
* You have successfully called createEvent for a new event (Step 4).
|
|
* **Generate only the finish tool call structure.**
|
|
|
|
**Available Tools:**
|
|
|
|
* **listEvents**: Retrieves the user's existing events. **Must** be called first if potential event details are found in the image, to enable duplicate checking.
|
|
* **getEventLocationId**: Takes a location description (text) and retrieves a unique ID (locationId) for it. Use this *before* createEvent *only* if a new event has a specific location mentioned.
|
|
* **createEvent**: Adds a *new*, non-duplicate event to the user's calendar/list. Only call *after* listEvents confirms the event is new. Requires name. Include startDateTime, endDateTime, and locationId (if available and retrieved).
|
|
* **stopAgent**: Signals that processing for the current image is complete (either action was taken, no action was needed because the event already existed, or no event was found). Call this as the final step.
|
|
`
|
|
|
|
const eventTools = `
|
|
[
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "listEvents",
|
|
"description": "Retrieves the list of the user's currently scheduled events. Essential for checking if an event identified in the image already exists to prevent duplicates. Must be called before potentially creating an event.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {},
|
|
"required": []
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "createEvent",
|
|
"description": "Creates a new event in the user's calendar or list. Use only after listEvents confirms the event is new. Provide all extracted details.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "The name or title of the event. This field is mandatory."
|
|
},
|
|
"startDateTime": {
|
|
"type": "string",
|
|
"description": "The event's start date and time in ISO 8601 format (e.g., '2025-04-18T10:00:00Z' or '2025-04-18T11:00:00+01:00'). Include if available."
|
|
},
|
|
"endDateTime": {
|
|
"type": "string",
|
|
"description": "The event's end date and time in ISO 8601 format. Optional, include if available and different from startDateTime."
|
|
},
|
|
"locationId": {
|
|
"type": "string",
|
|
"description": "The unique identifier (UUID or similar) for the event's location. Only include this if a location was specified for the event AND its ID was successfully retrieved using the getEventLocationId tool."
|
|
}
|
|
},
|
|
"required": ["name"]
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "getEventLocationId",
|
|
"description": "Retrieves a unique identifier for a location description associated with an event. Use this before createEvent if a new event specifies a location.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"locationDescription": {
|
|
"type": "string",
|
|
"description": "The text describing the location extracted from the image (e.g., 'Conference Room B', '123 Main St, Anytown', 'Zoom Link details')."
|
|
}
|
|
},
|
|
"required": ["locationDescription"]
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "stopAgent",
|
|
"description": "Call this tool only when event processing for the current image is fully complete. This occurs if: 1) No event info was found, OR 2) The found event already exists, OR 3) A new event has been successfully created.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {},
|
|
"required": []
|
|
}
|
|
}
|
|
}
|
|
]`
|
|
|
|
type listEventArguments struct{}
|
|
type createEventArguments struct {
|
|
Name string `json:"name"`
|
|
StartDateTime *string `json:"startDateTime"`
|
|
EndDateTime *string `json:"endDateTime"`
|
|
OrganizerName *string `json:"organizerName"`
|
|
}
|
|
type linkEventArguments struct {
|
|
EventID string `json:"eventId"`
|
|
}
|
|
|
|
func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationModel models.LocationModel) client.AgentClient {
|
|
agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{
|
|
SystemPrompt: eventPrompt,
|
|
JsonTools: eventTools,
|
|
Log: log,
|
|
EndToolCall: "stopAgent",
|
|
})
|
|
|
|
locationAgent := NewLocationAgent(log.WithPrefix("Events 📅 > Locations 📍"), locationModel)
|
|
locationQuery := "Can you get me the ID of the location present in this image?"
|
|
locationAgent.Options.Query = &locationQuery
|
|
|
|
agentClient.ToolHandler.AddTool("listEvents", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
|
|
return eventsModel.List(context.Background(), info.UserId)
|
|
})
|
|
|
|
agentClient.ToolHandler.AddTool("createEvent", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) {
|
|
args := createEventArguments{}
|
|
err := json.Unmarshal([]byte(_args), &args)
|
|
if err != nil {
|
|
return model.Events{}, err
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
layout := "2006-01-02T15:04:05Z"
|
|
|
|
startTime, err := time.Parse(layout, *args.StartDateTime)
|
|
if err != nil {
|
|
return model.Events{}, err
|
|
}
|
|
|
|
endTime, err := time.Parse(layout, *args.EndDateTime)
|
|
if err != nil {
|
|
return model.Events{}, err
|
|
}
|
|
|
|
events, err := eventsModel.Save(ctx, info.UserId, model.Events{
|
|
Name: args.Name,
|
|
StartDateTime: &startTime,
|
|
EndDateTime: &endTime,
|
|
})
|
|
|
|
if err != nil {
|
|
return model.Events{}, err
|
|
}
|
|
|
|
_, err = eventsModel.SaveToImage(ctx, info.ImageId, events.ID)
|
|
if err != nil {
|
|
return model.Events{}, err
|
|
}
|
|
|
|
return events, nil
|
|
})
|
|
|
|
agentClient.ToolHandler.AddTool("linkEvent", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) {
|
|
args := linkEventArguments{}
|
|
err := json.Unmarshal([]byte(_args), &args)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
contactUuid, err := uuid.Parse(args.EventID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
eventsModel.SaveToImage(ctx, info.ImageId, contactUuid)
|
|
return "Saved", nil
|
|
})
|
|
|
|
agentClient.ToolHandler.AddTool("getEventLocationId", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
|
|
// TODO: reenable this when I'm creating the agent locally instead of getting it from above.
|
|
locationAgent.RunAgent(info.UserId, info.ImageId, info.ImageName, *info.Image)
|
|
|
|
log.Debugf("Reply from location %s\n", locationAgent.Reply)
|
|
return locationAgent.Reply, nil
|
|
})
|
|
|
|
return agentClient
|
|
}
|