Haystack/backend/agents/location_agent.go

package agents

import (
	"context"
	"encoding/json"
	"fmt"
	"screenmark/screenmark/.gen/haystack/haystack/model"
	"screenmark/screenmark/agents/client"
	"screenmark/screenmark/models"

	"github.com/charmbracelet/log"
	"github.com/google/uuid"
)

const locationPrompt = `
Role: Location AI Assistant

Objective: Identify locations from images/text, manage a saved list, and answer user queries about saved locations using the provided tools.
The user does not want to have duplicate entries on their saved location list. So you should only create a new location if listLocation doesnt return
what would be a duplicate.

Core Logic:

**Extract Location Details:** Attempt to extract location details (like InputName, InputAddress) from the user's input.
	* If no details can be extracted, inform the user and use stopAgent.

**Check for Existing Location:** If details *were* extracted:
    * Use listLocations with the extracted InputName and/or InputAddress to search for potentially matching locations already saved in the list.

Action loop:
**Thinking**
	* Use the think tool to analytise the image.
		* You should think about whether listLocations already contains this location, or if it is a new location.
	* You should always call this after listLocations.
	* You must think about whether or not listLocations already has this location.

**Decide Action based on Search Results:**
	* If no existing location looks like the location on the input. You should use createLocation.
		* Do not use this tool if this location already exists.
	* If the input contains a location that already exists, you should use createExistingLocation.
		* If there is a similar location in listLocation, you should use this tool. It doesnt have to be an exact match.
	* Lastly, if the user asked a specific question about a location. You must do all the actions but also always use the reply tool to answer the user.
		* This is the only way you can communicate with the user if they asked a query.

You should repeat the action loop until all locations on the image are done.
Once you are done, use stopAgent.
`

const replyTool = `
{
        "type": "function",
        "function": {
            "name": "reply",
            "description": "Signals intent to provide information about a specific known location in response to a user's query. Use only if the user asked a question and the location's ID was found via listLocations.",
            "parameters": {
                "type": "object",
                "properties": {
                    "locationId": {
                        "type": "string",
                        "description": "The UUID of the saved location that the user is asking about."
                    }
                },
                "required": ["locationId"]
            }
        }
},`

const locationTools = `
[
    {
        "type": "function",
        "function": {
            "name": "think",
            "description": "Use this tool to think through the image, evaluating the location and whether or not it exists in the users listLocations. You should also ask yourself if the user has asked a query, and if you've used the correct tool to reply to them.",
            "parameters": {
                "type": "object",
                "properties": {
					"thought": {
						"type": "string",
						"description": "A singular thought about the image"
					}
				},
                "required": ["thought"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "listLocations",
            "description": "Retrieves the list of the user's currently saved locations (names, addresses, IDs). Use this first to check if a location from an image already exists, or to find the ID of a location the user is asking about.",
            "parameters": {
                "type": "object",
                "properties": {},
                "required": []
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "createLocation",
            "description": "Creates a new location with as much information as you can extract. Be precise. You should only add the parameters you can actually see on the image.",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "The primary name of the location"
                    },
                    "address": {
                        "type": "string",
                        "description": "The address of the location"
                    }
                },
                "required": ["name"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "createExistingLocation",
            "description": "Called when a location already exists in the users list, from listLocations. Only call this to indicate this image contains a duplicate.",
            "parameters": {
                "type": "object",
                "properties": {
                    "locationId": {
                        "type": "string",
                        "description": "The UUID of the location, from listLocations"
                    }
                },
                "required": ["locationId"]
            }
        }
    },
	%s
	{
		"type": "function",
		"function": {
		  "name": "stopAgent",
		  "description": "Use this tool to signal that the contact processing for the current image is complete.",
		  "parameters": {
			"type": "object",
			"properties": {},
			"required": []
		  }
		}
	  }
]`

func getLocationAgentTools(allowReply bool) string {
	if allowReply {
		return fmt.Sprintf(locationTools, replyTool)
	} else {
		return fmt.Sprintf(locationTools, "")
	}
}

type listLocationArguments struct{}
type createLocationArguments struct {
	Name    string  `json:"name"`
	Address *string `json:"address"`
}
type createExistingLocationArguments struct {
	LocationID string `json:"locationId"`
}

func NewLocationAgentWithComm(log *log.Logger, locationModel models.LocationModel) client.AgentClient {
	client := NewLocationAgent(log, locationModel)

	client.Options.JsonTools = getLocationAgentTools(true)

	return client
}

func NewLocationAgent(log *log.Logger, locationModel models.LocationModel) client.AgentClient {
	agentClient := client.CreateAgentClient(client.CreateAgentClientOptions{
		SystemPrompt: locationPrompt,
		JsonTools:    getLocationAgentTools(false),
		Log:          log,
		EndToolCall:  "stopAgent",
	})

	agentClient.ToolHandler.AddTool("listLocations", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
		return locationModel.List(context.Background(), info.UserId)
	})

	agentClient.ToolHandler.AddTool("createLocation", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) {
		args := createLocationArguments{}
		err := json.Unmarshal([]byte(_args), &args)
		if err != nil {
			return model.Locations{}, err
		}

		ctx := context.Background()

		// TODO: this tool could be simplier, as the model could have a SaveToImage joined with the save.

		location, err := locationModel.Save(ctx, info.UserId, model.Locations{
			Name:    args.Name,
			Address: args.Address,
		})

		if err != nil {
			return model.Locations{}, err
		}

		_, err = locationModel.SaveToImage(ctx, info.ImageId, location.ID)
		if err != nil {
			return model.Locations{}, err
		}

		return location, nil
	})

	agentClient.ToolHandler.AddTool("createExistingLocation", func(info client.ToolHandlerInfo, _args string, call client.ToolCall) (any, error) {
		args := createExistingLocationArguments{}
		err := json.Unmarshal([]byte(_args), &args)
		if err != nil {
			return "", err
		}

		ctx := context.Background()

		locationId, err := uuid.Parse(args.LocationID)
		if err != nil {
			return "", err
		}

		_, err = locationModel.SaveToImage(ctx, info.ImageId, locationId)
		if err != nil {
			return "", err
		}

		return "", nil
	})

	agentClient.ToolHandler.AddTool("reply", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
		return "ok", nil
	})

	agentClient.ToolHandler.AddTool("think", func(info client.ToolHandlerInfo, args string, call client.ToolCall) (any, error) {
		return "ok", nil
	})

	return agentClient
}