feat(prompts): adding better prompts & restoring tool_stop

Mistral's models seem to do something really strange if you allow for
`tool_choice` to be anything but `any`. They start putting the tool call
inside the `content` instead of an actual tool call. This means that I
need this `stop` mechanism using a tool call instead because I cannot
trust the model to do it by itself.

I quite like this model though, it's cheap, it's fast and it's open
source. And all the answers are pretty good!
This commit is contained in:
2025-04-17 15:24:21 +01:00
parent 4b0ef8b17f
commit 8e73ad6f4e
6 changed files with 252 additions and 153 deletions

View File

@ -194,7 +194,7 @@ func (chat *Chat) AddImage(imageName string, image []byte, query *string) error
contentLength := 1
if query != nil {
contentLength = 2
contentLength += 1
}
messageContent := ArrayMessage{

View File

@ -160,7 +160,8 @@ func (client AgentClient) Request(req *AgentRequestBody) (AgentResponse, error)
return AgentResponse{}, errors.New("Unsupported. We currently only accept 1 choice from AI.")
}
req.Chat.AddAiResponse(agentResponse.Choices[0].Message)
msg := agentResponse.Choices[0].Message
req.Chat.AddAiResponse(msg)
return agentResponse, nil
}
@ -178,7 +179,13 @@ func (client *AgentClient) ToolLoop(info ToolHandlerInfo, req *AgentRequestBody)
}
err = client.Process(info, req)
if err != nil {
if err == FinishedCall {
client.Log.Debug("Agent is finished")
}
return err
}
}
@ -230,7 +237,7 @@ func (client *AgentClient) RunAgent(userId uuid.UUID, imageId uuid.UUID, imageNa
panic(err)
}
toolChoice := "auto"
toolChoice := "any"
request := AgentRequestBody{
Tools: &tools,

View File

@ -12,70 +12,112 @@ import (
)
const contactPrompt = `
You are an agent that performs actions on contacts and people you find on an image.
**Role:** You are an AI assistant specialized in processing contact information from images. Your primary function is to use the provided tools (listContacts, createContact, stopAgent) to manage contacts based on image analysis and signal when processing is complete.
You can use tools to achieve your task.
**Primary Goal:** To accurately identify potential contacts in an image, check against existing contacts using the provided tools, create new contact entries when necessary (meticulously avoiding duplicates), and explicitly stop processing when finished or if no action is needed.
You should use listContacts to make sure that you don't create duplicate contacts.
**Input:** You will be given an image that may contain contact information.
Call createContact when you see there is a new contact on this image.
**Output Behavior (CRITICAL):**
* **If providing a text response:** Generate only the conversational text intended for the user in the response content. (Note: This should generally not happen in this workflow, as actions are handled by tools).
* **If using a tool:** Generate **only** the structured tool call request in the designated tool call section of the response. **Do NOT include the tool call JSON, parameters, or any description of your intention to call the tool within the main text/content response.** Your output must be strictly one or the other for a given turn: either text content OR a tool call structure.
Call finish if you dont think theres anything else to do.
**Core Workflow:**
1. **Image Analysis:**
* Carefully scan the provided image to identify and extract any visible contact details (Name, Phone Number, Email Address, Physical Address). Extract *all* available information for each potential contact.
* **If NO potential contact information is found in the image, proceed directly to Step 5 (call stopAgent).**
2. **Duplicate Check (Mandatory First Step if contacts found):**
* If potential contact(s) were found in Step 1, you **must** call the listContacts tool first. **Generate only the listContacts tool call structure.**
* Once you receive the list, compare the extracted information against the existing contacts to determine if each identified person is already present.
* **If *all* identified potential contacts already exist in the list, proceed directly to Step 5 (call stopAgent).**
3. **Create New Contact (Conditional):**
* For each potential contact identified in Step 1 that your check in Step 2 confirms is *new*:
* Call the createContact tool with *all* corresponding extracted information (name, phoneNumber, address, email). name is mandatory. **Generate only the createContact tool call structure.**
* Process *one new contact creation per turn*. If multiple new contacts need creating, you will call createContact sequentially (one call per turn).
4. **Handling Multiple Contacts:**
* The workflow intrinsically handles multiple contacts by requiring a listContacts check first, followed by potential sequential createContact calls for each new individual found.
5. **Task Completion / No Action Needed:**
* Call the stopAgent tool **only** when one of the following conditions is met:
* No potential contact information was found in the initial image analysis (Step 1).
* The listContacts check confirmed that *all* potential contacts identified in the image already exist (Step 2).
* You have successfully processed all identified contacts (i.e., performed the listContacts check and called createContact for *all* new individuals found).
* **Generate only the stopAgent tool call structure.**
**Available Tools:**
* **listContacts**: Retrieves the existing contact list. **Must** be called first if potential contacts are found in the image, to enable duplicate checking.
* **createContact**: Adds a *new*, non-duplicate contact. Only call *after* listContacts confirms the person is new. name is mandatory.
* **stopAgent**: Signals that processing for the current image is complete (either action was taken, no action was needed, or all identified contacts already existed). Call this as the final step or when no other action is applicable based on the workflow.
**Key Instructions & Constraints:**
* **Strict Output Separation:** **Never mix text content and tool call structures in the same response content. Generate ONLY the tool call when using a tool.**
* **Tool Order:** listContacts (if contacts found) -> [createContact (if new contacts exist, potentially multiple calls)] -> stopAgent. OR stopAgent directly if no contacts found or all contacts exist.
* **Accuracy & Completeness:** Strive for accurate extraction and provide all available details when calling createContact.
* **No Duplicates:** The core purpose of the listContacts check is to prevent duplicate entries.
* **Use Provided Tools Only:** Only use listContacts, createContact, and stopAgent.
`
const contactTools = `
[
{
"type": "function",
"function": {
"name": "listContacts",
"description": "List the users existing contacts",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "createContact",
"description": "Creates a new contact",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "the name of the person"
},
"phoneNumber": {
"type": "string"
},
"address": {
"type": "string",
"description": "their physical address"
},
"email": {
"type": "string"
}
},
"required": ["name"]
}
}
},
{
"type": "function",
"function": {
"name": "finish",
"description": "Call when you dont think theres anything to do",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
{
"type": "function",
"function": {
"name": "listContacts",
"description": "Retrieves the complete list of the user's currently saved contacts (e.g., names, phone numbers, emails if available in the stored data). This tool is essential and **must** be called *before* attempting to create a new contact if potential contact info is found in the image, to check if the person already exists and prevent duplicate entries.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "createContact",
"description": "Saves a new contact to the user's contact list. Only use this function **after** confirming the contact does not already exist by checking the output of listContacts. Provide all available extracted information for the new contact. Process one new contact per call.",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The full name of the person being added as a contact. This field is mandatory."
},
"phoneNumber": {
"type": "string",
"description": "The contact's primary phone number, including area or country code if available. Provide this if extracted from the image."
},
"address": {
"type": "string",
"description": "The complete physical mailing address of the contact (e.g., street number, street name, city, state/province, postal code, country). Provide this if extracted from the image."
},
"email": {
"type": "string",
"description": "The contact's primary email address. Provide this if extracted from the image."
}
},
"required": ["name"]
}
}
},
{
"type": "function",
"function": {
"name": "stopAgent",
"description": "Use this tool to signal that the contact processing for the current image is complete. Call this *only* when: 1) No contact info was found initially, OR 2) All found contacts were confirmed to already exist after calling listContacts, OR 3) All necessary createContact calls for new individuals have been completed.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
]
`
@ -95,7 +137,7 @@ func NewContactAgent(log *log.Logger, contactModel models.ContactModel) client.A
SystemPrompt: contactPrompt,
JsonTools: contactTools,
Log: log,
EndToolCall: "finish",
EndToolCall: "stopAgent",
})
agentClient.ToolHandler.AddTool("listContacts", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {

View File

@ -13,24 +13,51 @@ import (
)
const eventPrompt = `
You are an agent.
**Role:** You are an Event Processing AI Assistant specialized in extracting event information from images, managing event data using provided tools, and ensuring accuracy and avoiding duplicates.
The user will send you images and you have to identify if they have any events or a place.
This could be a friend suggesting to meet, a conference, or anything that looks like an event.
**Primary Goal:** To analyze images, identify potential events (like meetings, appointments, conferences, invitations), extract key details (name, date/time, location description), check against existing events, retrieve location identifiers if applicable, create new event entries when necessary, and signal completion using the 'finish' tool.
There are various tools you can use to perform this task.
**Input:** You will be given an image that may contain information about an event, including details like name, date, time, and location.
listEvents
Lists the users already existing events.
**Core Workflow:**
createEvent
Use this to create a new events.
1. **Image Analysis & Detail Extraction:**
* Carefully scan the image to identify potential event information.
* Extract key details: Event Name/Title, Start Date/Time, End Date/Time (if specified), and any Location Description mentioned.
* **If NO identifiable event information (Name and/or Date/Time) is found, proceed directly to Step 6 (call finish).** A location alone is not sufficient to trigger event creation.
getEventLocationId
Use this if the image contains a location or place. This tool will return the locationId.
2. **Duplicate Check (Mandatory if Event Found):**
* If potential event details were found, you **must** call the listEvents tool first to check for duplicates. **Generate only the listEvents tool call structure.**
* Once you receive the list, compare the extracted event details (Name, Start Date/Time primarily) against the existing events.
* **If a matching event already exists, proceed directly to Step 6 (call finish).**
finish
Call when there is nothing else to do.
3. **Location ID Retrieval (Conditional):**
* If the event is identified as *new* (Step 2) AND a *location description* was extracted (Step 1):
* Call the getEventLocationId tool, providing the extracted location description. **Generate only the getEventLocationId tool call structure.**
* Await the result containing the locationId. If the tool fails or doesn't return a locationId, proceed to Step 4 but omit the locationId.
4. **Create New Event (Conditional):**
* If the event was identified as *new* (Step 2):
* Prepare the parameters for the createEvent tool using the extracted details (Name, Start Date/Time, End Date/Time).
* If a locationId was successfully retrieved in Step 3, include it.
* Call the createEvent tool. **Generate only the createEvent tool call structure.**
5. **Handling Multiple Events:**
* If the image contains multiple distinct events, ideally process them one by one following steps 1-4 (or 1-2 then 6) for each before finishing. (Current structure implies one event per image interaction leading to finish).
6. **Task Completion / No Action Needed:**
* Call the finish tool **only** when one of the following conditions is met:
* No identifiable event information was found in the initial image analysis (Step 1).
* The listEvents check confirmed the identified event already exists (Step 2).
* You have successfully called createEvent for a new event (Step 4).
* **Generate only the finish tool call structure.**
**Available Tools:**
* **listEvents**: Retrieves the user's existing events. **Must** be called first if potential event details are found in the image, to enable duplicate checking.
* **getEventLocationId**: Takes a location description (text) and retrieves a unique ID (locationId) for it. Use this *before* createEvent *only* if a new event has a specific location mentioned.
* **createEvent**: Adds a *new*, non-duplicate event to the user's calendar/list. Only call *after* listEvents confirms the event is new. Requires name. Include startDateTime, endDateTime, and locationId (if available and retrieved).
* **stopAgent**: Signals that processing for the current image is complete (either action was taken, no action was needed because the event already existed, or no event was found). Call this as the final step.
`
const eventTools = `
@ -39,7 +66,7 @@ const eventTools = `
"type": "function",
"function": {
"name": "listEvents",
"description": "List the events the user already has.",
"description": "Retrieves the list of the user's currently scheduled events. Essential for checking if an event identified in the image already exists to prevent duplicates. Must be called before potentially creating an event.",
"parameters": {
"type": "object",
"properties": {},
@ -47,52 +74,57 @@ const eventTools = `
}
}
},
{
{
"type": "function",
"function": {
"name": "createEvent",
"description": "Use to create a new events",
"description": "Creates a new event in the user's calendar or list. Use only after listEvents confirms the event is new. Provide all extracted details.",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string"
"type": "string",
"description": "The name or title of the event. This field is mandatory."
},
"startDateTime": {
"type": "string",
"description": "The start time as an ISO string"
"description": "The event's start date and time in ISO 8601 format (e.g., '2025-04-18T10:00:00Z' or '2025-04-18T11:00:00+01:00'). Include if available."
},
"endDateTime": {
"type": "string",
"description": "The end time as an ISO string"
"description": "The event's end date and time in ISO 8601 format. Optional, include if available and different from startDateTime."
},
"locationId": {
"type": "string",
"description": "The UUID of this location. You should use getEventLocationId to get this information, but only if you believe the event contains a location"
}
},
"locationId": {
"type": "string",
"description": "The unique identifier (UUID or similar) for the event's location. Only include this if a location was specified for the event AND its ID was successfully retrieved using the getEventLocationId tool."
}
},
"required": ["name"]
}
}
},
{
{
"type": "function",
"function": {
"name": "getEventLocationId",
"description": "Get the ID of the location on the image, only use if the event contains a location or place.",
"description": "Retrieves a unique identifier for a location description associated with an event. Use this before createEvent if a new event specifies a location.",
"parameters": {
"type": "object",
"properties": {},
"required": []
"properties": {
"locationDescription": {
"type": "string",
"description": "The text describing the location extracted from the image (e.g., 'Conference Room B', '123 Main St, Anytown', 'Zoom Link details')."
}
},
"required": ["locationDescription"]
}
}
},
{
{
"type": "function",
"function": {
"name": "finish",
"description": "Call this when there is nothing left to do.",
"name": "stopAgent",
"description": "Call this tool only when event processing for the current image is fully complete. This occurs if: 1) No event info was found, OR 2) The found event already exists, OR 3) A new event has been successfully created.",
"parameters": {
"type": "object",
"properties": {},
@ -118,7 +150,7 @@ func NewEventAgent(log *log.Logger, eventsModel models.EventModel, locationModel
SystemPrompt: eventPrompt,
JsonTools: eventTools,
Log: log,
EndToolCall: "finish",
EndToolCall: "stopAgent",
})
locationAgent := NewLocationAgent(log.WithPrefix("Events 📅 > Locations 📍"), locationModel)

View File

@ -12,24 +12,37 @@ import (
)
const locationPrompt = `
You are an agent.
Role: Location AI Assistant
The user will send you images and you have to identify if they have any location or a place.
This could a picture of a real place, an address, or it's name.
Objective: Identify locations from images/text, manage a saved list, and answer user queries about saved locations using the provided tools.
There are various tools you can use to perform this task.
Core Logic:
listLocations
Lists the users already existing locations.
1. **Analyze Input:** Look for location details (Name, Address) in the image and check for any user query about a location.
createLocation
Use this to create a new location, when you don't see a matching one from listLocations call.
2. **Handle User Query First:**
* If the user asks about a *specific* location:
* Use listLocations to find its locationId.
* If found, use reply with the locationId.
* If not found, prepare summary (no matching location).
* *(Proceed to step 4)*
reply
Use this only if the user has asked a question about a location.
3. **Handle Image Location (if no query was handled):**
* If location details were found in the image:
* Use listLocations to check if it's already saved.
* If *new*, use createLocation (Name is required).
* If *duplicate*, prepare summary (location already exists).
* *(Proceed to step 4)*
finish
Call when there is nothing else to do.
4. **Summarize & Stop:** Always finish by writing a message explaining what you did (e.g., called reply, called createLocation, found a duplicate, couldn't find a match) or if no location information was found. After providing the summary message, call stopAgent to signal the end of processing for this turn.
Tool Usage:
* listLocations: Check saved locations (for queries or before saving).
* createLocation: Save a *new* location (requires Name).
* reply: Answer a query about a *known*, *saved* location using its locationId.
* stopAgent: Signals the end of the agent's processing for the current turn. Call this *after* providing the summary message.
* **Constraint:** Typically, only one main action tool (listLocations, createLocation, or reply) will be called per turn before summarizing and stopping. listLocations might precede createLocation or reply within the logic.
`
const locationTools = `
@ -38,7 +51,7 @@ const locationTools = `
"type": "function",
"function": {
"name": "listLocations",
"description": "List the locations the user already has.",
"description": "Retrieves the list of the user's currently saved locations (names, addresses, IDs). Use this first to check if a location from an image already exists, or to find the ID of a location the user is asking about.",
"parameters": {
"type": "object",
"properties": {},
@ -46,53 +59,56 @@ const locationTools = `
}
}
},
{
{
"type": "function",
"function": {
"name": "createLocation",
"description": "Use to create a new location",
"description": "Creates a new location entry in the user's saved list. Use only after listLocations confirms the location does not already exist.",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string"
"type": "string",
"description": "The primary name of the location (e.g., 'Eiffel Tower', 'Mom's House', 'Acme Corp HQ'). This field is mandatory."
},
"address": {
"type": "string"
"type": "string",
"description": "The full street address of the location, if available (e.g., 'Champ de Mars, 5 Av. Anatole France, 75007 Paris, France'). Include if extracted."
}
},
},
"required": ["name"]
}
}
},
{
{
"type": "function",
"function": {
"name": "reply",
"description": "Reply to a user query, only if the user has asked something",
"description": "Signals intent to provide information about a specific known location in response to a user's query. Use only if the user asked a question and the location's ID was found via listLocations.",
"parameters": {
"type": "object",
"properties": {
"locationId": {
"type": "string"
}
},
"locationId": {
"type": "string",
"description": "The unique identifier of the saved location that the user is asking about."
}
},
"required": ["locationId"]
}
}
},
{
"type": "function",
"function": {
"name": "finish",
"description": "Call this when there is nothing left to do.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
"type": "function",
"function": {
"name": "stopAgent",
"description": "Use this tool to signal that the contact processing for the current image is complete.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
]`
type listLocationArguments struct{}
@ -109,7 +125,7 @@ func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) clien
SystemPrompt: locationPrompt,
JsonTools: locationTools,
Log: log,
EndToolCall: "finish",
EndToolCall: "stopAgent",
})
agentClient.ToolHandler.AddTool("listLocations", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {

View File

@ -9,7 +9,7 @@ import (
const orchestratorPrompt = `
**Role:** You are an Orchestrator AI responsible for analyzing images provided by the user.
**Primary Task:** Examine the input image and determine which specialized AI agent(s), available as tool calls, should be invoked to process the relevant information within the image. Your goal is to extract and structure useful information for the user by selecting the most appropriate tool(s).
**Primary Task:** Examine the input image and determine which specialized AI agent(s), available as tool calls, should be invoked to process the relevant information within the image, or if no specialized processing is needed. Your goal is to either extract and structure useful information for the user by selecting the most appropriate tool(s) or explicitly indicate that no specific action is required.
**Input:** User-provided image.
@ -20,18 +20,13 @@ const orchestratorPrompt = `
* Information about a person or contact details
* Information about a place, location, or address
* Information about an event
* Content that doesn't fit any specific category or lacks actionable information.
2. **Agent Selection - Determine ALL that apply:**
* **contactAgent:** Is there information specifically related to a person or their contact details (e.g., business card, name/email/phone)? If YES, select contactAgent.
* **locationAgent:** Is there information specifically identifying a place, location, or address (e.g., map, street sign, address text)? If YES, select locationAgent.
* **eventAgent:** Is there information specifically related to an event (e.g., invitation, poster with date/time, schedule)? If YES, select eventAgent.
* **noteAgent** Does the image contain *any* text/writing (including code, formulas)?
* If YES, *and* if contactAgent, locationAgent, or eventAgent were *also* selected, consider if noteAgent captures *additional* textual information not covered by the others. Call noteAgent alongside the others *only if* there is significant extra text.
* If YES, and *none* of the other agents (contact, location, event) were selected, then select noteAgent.
3. **Final Tool Choice:**
* If *at least one* of noteAgent, contactAgent, locationAgent, or eventAgent was selected in Step 2, prepare to call *all* selected agents in parallel.
* If *none* of those four agents were selected after your analysis, you must stop.
**Available Agents (Tools):**
@ -39,13 +34,11 @@ const orchestratorPrompt = `
* **contactAgent**: Use when the image contains some person or contact.
* **locationAgent**: Use when the image contains some place, location or address.
* **eventAgent**: Use when the image contains some event.
* **noAgent**: Call this when you are done working on this image.
**Execution Rules:**
* Call all applicable agents (noteAgent, contactAgent, locationAgent, eventAgent) simultaneously (in parallel).
* If and only if none of the other agents apply, stop.
**Output:** Specify the tool call(s) required based on your final choice.
* Call all applicable specialized agents (noteAgent, contactAgent, locationAgent, eventAgent) simultaneously (in parallel).
`
const orchestratorTools = `
@ -97,8 +90,21 @@ const orchestratorTools = `
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "noAgent",
"description": "Extracts details related to scheduled events, appointments, or specific occasions. Use when the image contains information like event titles, dates, times, venues, agendas, or descriptions, typically found on invitations, posters, calendar entries, or schedules.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
]`
]
`
type OrchestratorAgent struct {
Client client.AgentClient
@ -115,39 +121,35 @@ func NewOrchestratorAgent(log *log.Logger, noteAgent NoteAgent, contactAgent cli
SystemPrompt: orchestratorPrompt,
JsonTools: orchestratorTools,
Log: log,
EndToolCall: "noAction",
EndToolCall: "noAgent",
})
agent.ToolHandler.AddTool("noteAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
go noteAgent.GetNotes(info.UserId, info.ImageId, imageName, imageData)
// go noteAgent.GetNotes(info.UserId, info.ImageId, imageName, imageData)
return Status{
Ok: true,
}, nil
return "noteAgent called successfully", nil
})
agent.ToolHandler.AddTool("contactAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
go contactAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData)
return Status{
Ok: true,
}, nil
return "contactAgent called successfully", nil
})
agent.ToolHandler.AddTool("locationAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
go locationAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData)
return Status{
Ok: true,
}, nil
return "locationAgent called successfully", nil
})
agent.ToolHandler.AddTool("eventAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
go eventAgent.RunAgent(info.UserId, info.ImageId, imageName, imageData)
return Status{
Ok: true,
}, nil
return "eventAgent called successfully", nil
})
agent.ToolHandler.AddTool("noAgent", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
return "ok", nil
})
return agent