Haystack/backend/agents/embeddings_agent.go

69 lines
1.7 KiB
Go

package agents
import (
"context"
"screenmark/screenmark/agents/client"
"screenmark/screenmark/models"
"github.com/charmbracelet/log"
"github.com/google/uuid"
)
const embeddingsAgentPropmt = `
You are an agent who's job it is to describe the contents of an image.
This description should be detailed as it will be used to create embeddings from the image.
You should focus more on the content of the image, rather than it's appearence.
`
type EmbeddingAgent struct {
embeddings client.EmbeddingsClient
client client.AgentClient
imageModel models.ImageModel
log *log.Logger
}
func (agent EmbeddingAgent) GetEmbeddings(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error {
request := client.AgentRequestBody{
Model: "gpt-4.1-mini",
Temperature: 0.3,
ResponseFormat: client.ResponseFormat{
Type: "text",
},
Chat: &client.Chat{
Messages: make([]client.ChatMessage, 0),
},
}
request.Chat.AddSystem(embeddingsAgentPropmt)
request.Chat.AddImage(imageName, imageData, nil)
resp, err := agent.client.Request(&request)
if err != nil {
return err
}
description := resp.Choices[0].Message.Content
log.Info(description)
embeddings, err := agent.embeddings.Request(description)
if err != nil {
return err
}
return agent.imageModel.UpdateEmbedding(context.Background(), imageId, embeddings.Embeddings)
}
func NewEmbeddingsAgent(log *log.Logger, imageModel models.ImageModel) EmbeddingAgent {
return EmbeddingAgent{
client: client.CreateAgentClient(client.CreateAgentClientOptions{
SystemPrompt: embeddingsAgentPropmt,
Log: log,
}),
embeddings: client.CreateEmbeddingsClient(),
imageModel: imageModel,
}
}