Haystack/backend/agents/description_agent.go

package agents

import (
	"context"
	"fmt"
	"screenmark/screenmark/agents/client"
	"screenmark/screenmark/models"

	"github.com/charmbracelet/log"
	"github.com/google/uuid"
)

const noteAgentPrompt = `
You are an AI agent who's job is to describe the image you see.

You should also add any text you see in the image, if no text exists, just add a description.
Be consise and don't add too much extra information or formatting characters, simple text.
`

type DescriptionAgent struct {
	client client.AgentClient

	imageModel models.ImageModel
}

func (agent DescriptionAgent) Describe(log *log.Logger, imageId uuid.UUID, imageName string, imageData []byte) error {
	request := client.AgentRequestBody{
		Model:       "google/gemini-2.5-flash-lite-preview-06-17",
		Temperature: 0.3,
		ResponseFormat: client.ResponseFormat{
			Type: "text",
		},
		Chat: &client.Chat{
			Messages: make([]client.ChatMessage, 0),
		},
	}

	request.Chat.AddSystem(noteAgentPrompt)
	request.Chat.AddImage(imageName, imageData, nil)

	log.Debug("Sending description request")
	resp, err := agent.client.Request(&request)
	if err != nil {
		return fmt.Errorf("Could not request", err)
	}

	ctx := context.Background()

	markdown := resp.Choices[0].Message.Content

	log.Debugf("Response %s", markdown)

	err = agent.imageModel.AddDescription(ctx, imageId, markdown)
	if err != nil {
		return err
	}

	return nil
}

func NewDescriptionAgent(log *log.Logger, imageModel models.ImageModel) DescriptionAgent {
	client := client.CreateAgentClient(client.CreateAgentClientOptions{
		SystemPrompt: noteAgentPrompt,
		Log:          log,
	})

	agent := DescriptionAgent{
		client:     client,
		imageModel: imageModel,
	}

	return agent
}