74 lines
1.7 KiB
Go
74 lines
1.7 KiB
Go
package agents
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"screenmark/screenmark/agents/client"
|
|
"screenmark/screenmark/models"
|
|
|
|
"github.com/charmbracelet/log"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
const noteAgentPrompt = `
|
|
You are an AI agent who's job is to describe the image you see.
|
|
|
|
You should also add any text you see in the image, if no text exists, just add a description.
|
|
Be consise and don't add too much extra information or formatting characters, simple text.
|
|
`
|
|
|
|
type DescriptionAgent struct {
|
|
client client.AgentClient
|
|
|
|
imageModel models.ImageModel
|
|
}
|
|
|
|
func (agent DescriptionAgent) Describe(log *log.Logger, imageId uuid.UUID, imageName string, imageData []byte) error {
|
|
request := client.AgentRequestBody{
|
|
Model: "google/gemini-2.5-flash-lite-preview-06-17",
|
|
Temperature: 0.3,
|
|
ResponseFormat: client.ResponseFormat{
|
|
Type: "text",
|
|
},
|
|
Chat: &client.Chat{
|
|
Messages: make([]client.ChatMessage, 0),
|
|
},
|
|
}
|
|
|
|
request.Chat.AddSystem(noteAgentPrompt)
|
|
request.Chat.AddImage(imageName, imageData, nil)
|
|
|
|
log.Debug("Sending description request")
|
|
resp, err := agent.client.Request(&request)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not request", err)
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
markdown := resp.Choices[0].Message.Content
|
|
|
|
log.Debugf("Response %s", markdown)
|
|
|
|
err = agent.imageModel.AddDescription(ctx, imageId, markdown)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func NewDescriptionAgent(log *log.Logger, imageModel models.ImageModel) DescriptionAgent {
|
|
client := client.CreateAgentClient(client.CreateAgentClientOptions{
|
|
SystemPrompt: noteAgentPrompt,
|
|
Log: log,
|
|
})
|
|
|
|
agent := DescriptionAgent{
|
|
client: client,
|
|
imageModel: imageModel,
|
|
}
|
|
|
|
return agent
|
|
}
|