package agents import ( "context" "screenmark/screenmark/.gen/haystack/haystack/model" "screenmark/screenmark/agents/client" "screenmark/screenmark/models" "github.com/charmbracelet/log" "github.com/google/uuid" ) const noteAgentPrompt = ` You are a helpful agent, who's job is to extract notes from images. Not all images contain notes, in such cases there's not need to create them. An image can have more than one note. You must return markdown, and adapt the text to best fit markdown. Do not return anything except markdown. If the image contains code, add this inside code blocks. You must try and correctly guess the language too. ` type NoteAgent struct { client client.AgentClient noteModel models.NoteModel } func (agent NoteAgent) GetNotes(userId uuid.UUID, imageId uuid.UUID, imageName string, imageData []byte) error { request := client.AgentRequestBody{ Model: "pixtral-12b-2409", Temperature: 0.3, ResponseFormat: client.ResponseFormat{ Type: "text", }, Chat: &client.Chat{ Messages: make([]client.ChatMessage, 0), }, } request.Chat.AddSystem(noteAgentPrompt) request.Chat.AddImage(imageName, imageData, nil) resp, err := agent.client.Request(&request) if err != nil { return err } ctx := context.Background() markdown := resp.Choices[0].Message.Content note, err := agent.noteModel.Save(ctx, userId, model.Notes{ Name: "the note", // TODO: add some json schema Content: markdown, }) if err != nil { return err } _, err = agent.noteModel.SaveToImage(ctx, imageId, note.ID) if err != nil { return err } return nil } func NewNoteAgent(log *log.Logger, noteModel models.NoteModel) (NoteAgent, error) { client, err := client.CreateAgentClient(log) if err != nil { return NoteAgent{}, err } agent := NoteAgent{ client: client, noteModel: noteModel, } return agent, nil }