Haystack/backend/agents/description_agent.go
John Costa 59bf884f5d refactor: changing notes to be a simple image description
Notes would generate too often and not be very useful. This is much
better.
2025-07-24 13:59:24 +01:00

75 lines
1.7 KiB
Go

package agents
import (
"context"
"screenmark/screenmark/agents/client"
"screenmark/screenmark/models"
"github.com/charmbracelet/log"
"github.com/google/uuid"
)
const noteAgentPrompt = `
You are a helpful agent, who's job is to extract notes from images.
Not all images contain notes, in such cases there's not need to create them.
An image can have more than one note.
You must return markdown, and adapt the text to best fit markdown.
Do not return anything except markdown.
If the image contains code, add this inside code blocks. You must try and correctly guess the language too.
`
type DescriptionAgent struct {
client client.AgentClient
imageModel models.ImageModel
}
func (agent DescriptionAgent) Describe(imageId uuid.UUID, imageName string, imageData []byte) error {
request := client.AgentRequestBody{
Model: "gpt-4.1-nano",
Temperature: 0.3,
ResponseFormat: client.ResponseFormat{
Type: "text",
},
Chat: &client.Chat{
Messages: make([]client.ChatMessage, 0),
},
}
request.Chat.AddSystem(noteAgentPrompt)
request.Chat.AddImage(imageName, imageData, nil)
resp, err := agent.client.Request(&request)
if err != nil {
return err
}
ctx := context.Background()
markdown := resp.Choices[0].Message.Content
err = agent.imageModel.AddDescription(ctx, imageId, markdown)
if err != nil {
return err
}
return nil
}
func NewDescriptionAgent(log *log.Logger, imageModel models.ImageModel) DescriptionAgent {
client := client.CreateAgentClient(client.CreateAgentClientOptions{
SystemPrompt: noteAgentPrompt,
Log: log,
})
agent := DescriptionAgent{
client: client,
imageModel: imageModel,
}
return agent
}