Ripped out literally everything to simplify the backend as much as possible. Some of the code was so horrifically complicated it's insaneeee
80 lines
1.8 KiB
Go
80 lines
1.8 KiB
Go
package agents
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"screenmark/screenmark/.gen/haystack/haystack/model"
|
|
"screenmark/screenmark/agents/client"
|
|
"screenmark/screenmark/models"
|
|
|
|
"github.com/charmbracelet/log"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
const noteAgentPrompt = `
|
|
You are an AI agent who's job is to describe the image you see.
|
|
|
|
You should also add any text you see in the image, if no text exists, just add a description.
|
|
Be consise and don't add too much extra information or formatting characters, simple text.
|
|
|
|
You must write this text in Markdown. You can add extra information for the user.
|
|
You must organise this text nicely, not be all over the place.
|
|
`
|
|
|
|
type DescriptionAgent struct {
|
|
client client.AgentClient
|
|
|
|
imageModel models.ImageModel
|
|
}
|
|
|
|
func (agent DescriptionAgent) Describe(log *log.Logger, imageID uuid.UUID, imageName string, imageData []byte) error {
|
|
request := client.AgentRequestBody{
|
|
Model: "policy/images",
|
|
Temperature: 0.3,
|
|
ResponseFormat: client.ResponseFormat{
|
|
Type: "text",
|
|
},
|
|
Chat: &client.Chat{
|
|
Messages: make([]client.ChatMessage, 0),
|
|
},
|
|
}
|
|
|
|
request.Chat.AddSystem(noteAgentPrompt)
|
|
request.Chat.AddImage(imageName, imageData, nil)
|
|
|
|
log.Debug("Sending description request")
|
|
resp, err := agent.client.Request(&request)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not request. %s", err)
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
markdown := resp.Choices[0].Message.Content
|
|
|
|
_, err = agent.imageModel.Update(ctx, model.Image{
|
|
ID: imageID,
|
|
Description: markdown,
|
|
})
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func NewDescriptionAgent(log *log.Logger, imageModel models.ImageModel) DescriptionAgent {
|
|
client := client.CreateAgentClient(client.CreateAgentClientOptions{
|
|
SystemPrompt: noteAgentPrompt,
|
|
Log: log,
|
|
})
|
|
|
|
agent := DescriptionAgent{
|
|
client: client,
|
|
imageModel: imageModel,
|
|
}
|
|
|
|
return agent
|
|
}
|