Haystack/backend/agents/embeddings_agent.go

package agents

import (
	"context"
	"screenmark/screenmark/agents/client"
	"screenmark/screenmark/models"

	"github.com/charmbracelet/log"
	"github.com/google/uuid"
)

const embeddingsAgentPropmt = `
You are an agent who's job it is to describe the contents of an image.
This description should be detailed as it will be used to create embeddings from the image.
You should focus more on the content of the image, rather than it's appearence.
`

type EmbeddingAgent struct {
	embeddings client.EmbeddingsClient

	client client.AgentClient

	imageModel models.ImageModel

	log *log.Logger
}

func (agent EmbeddingAgent) GetEmbeddings(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error {
	request := client.AgentRequestBody{
		Model:       "gpt-4.1-mini",
		Temperature: 0.3,
		ResponseFormat: client.ResponseFormat{
			Type: "text",
		},
		Chat: &client.Chat{
			Messages: make([]client.ChatMessage, 0),
		},
	}

	request.Chat.AddSystem(embeddingsAgentPropmt)
	request.Chat.AddImage(imageName, imageData, nil)

	resp, err := agent.client.Request(&request)
	if err != nil {
		return err
	}

	description := resp.Choices[0].Message.Content
	log.Info(description)

	embeddings, err := agent.embeddings.Request(description)
	if err != nil {
		return err
	}

	return agent.imageModel.UpdateEmbedding(context.Background(), imageId, embeddings.Embeddings)
}

func NewEmbeddingsAgent(log *log.Logger, imageModel models.ImageModel) EmbeddingAgent {
	return EmbeddingAgent{
		client: client.CreateAgentClient(client.CreateAgentClientOptions{
			SystemPrompt: embeddingsAgentPropmt,
			Log:          log,
		}),
		embeddings: client.CreateEmbeddingsClient(),
		imageModel: imageModel,
	}
}