feat: finishing description agent infrastructure on backend
This commit is contained in:
@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
@ -101,7 +102,7 @@ func CreateAgentClient(options CreateAgentClientOptions) AgentClient {
|
||||
|
||||
return AgentClient{
|
||||
apiKey: apiKey,
|
||||
url: "https://api.openai.com/v1/chat/completions",
|
||||
url: "https://router.requesty.ai/v1/chat/completions",
|
||||
Do: func(req *http.Request) (*http.Response, error) {
|
||||
client := &http.Client{}
|
||||
return client.Do(req)
|
||||
@ -132,29 +133,29 @@ func (client AgentClient) getRequest(body []byte) (*http.Request, error) {
|
||||
func (client AgentClient) Request(req *AgentRequestBody) (AgentResponse, error) {
|
||||
jsonAiRequest, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return AgentResponse{}, err
|
||||
return AgentResponse{}, fmt.Errorf("Could not format JSON", err)
|
||||
}
|
||||
|
||||
httpRequest, err := client.getRequest(jsonAiRequest)
|
||||
if err != nil {
|
||||
return AgentResponse{}, err
|
||||
return AgentResponse{}, fmt.Errorf("Could not get request", err)
|
||||
}
|
||||
|
||||
resp, err := client.Do(httpRequest)
|
||||
if err != nil {
|
||||
return AgentResponse{}, err
|
||||
return AgentResponse{}, fmt.Errorf("Could not send request", err)
|
||||
}
|
||||
|
||||
response, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return AgentResponse{}, err
|
||||
return AgentResponse{}, fmt.Errorf("Could not read body", err)
|
||||
}
|
||||
|
||||
agentResponse := AgentResponse{}
|
||||
err = json.Unmarshal(response, &agentResponse)
|
||||
|
||||
if err != nil {
|
||||
return AgentResponse{}, err
|
||||
return AgentResponse{}, fmt.Errorf("Could not unmarshal response, response: %s", string(response), err)
|
||||
}
|
||||
|
||||
if len(agentResponse.Choices) != 1 {
|
||||
@ -245,7 +246,7 @@ func (client *AgentClient) RunAgent(userId uuid.UUID, imageId uuid.UUID, imageNa
|
||||
request := AgentRequestBody{
|
||||
Tools: &tools,
|
||||
ToolChoice: &toolChoice,
|
||||
Model: "gpt-4.1-mini",
|
||||
Model: "google/gemini-2.5-flash",
|
||||
RandomSeed: &seed,
|
||||
Temperature: 0.3,
|
||||
EndToolCall: client.Options.EndToolCall,
|
||||
|
@ -2,6 +2,7 @@ package agents
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"screenmark/screenmark/agents/client"
|
||||
"screenmark/screenmark/models"
|
||||
|
||||
@ -10,15 +11,10 @@ import (
|
||||
)
|
||||
|
||||
const noteAgentPrompt = `
|
||||
You are a helpful agent, who's job is to extract notes from images.
|
||||
Not all images contain notes, in such cases there's not need to create them.
|
||||
You are an AI agent who's job is to describe the image you see.
|
||||
|
||||
An image can have more than one note.
|
||||
|
||||
You must return markdown, and adapt the text to best fit markdown.
|
||||
Do not return anything except markdown.
|
||||
|
||||
If the image contains code, add this inside code blocks. You must try and correctly guess the language too.
|
||||
You should also add any text you see in the image, if no text exists, just add a description.
|
||||
Be consise and don't add too much extra information or formatting characters, simple text.
|
||||
`
|
||||
|
||||
type DescriptionAgent struct {
|
||||
@ -27,9 +23,9 @@ type DescriptionAgent struct {
|
||||
imageModel models.ImageModel
|
||||
}
|
||||
|
||||
func (agent DescriptionAgent) Describe(imageId uuid.UUID, imageName string, imageData []byte) error {
|
||||
func (agent DescriptionAgent) Describe(log *log.Logger, imageId uuid.UUID, imageName string, imageData []byte) error {
|
||||
request := client.AgentRequestBody{
|
||||
Model: "gpt-4.1-nano",
|
||||
Model: "google/gemini-2.5-flash-lite-preview-06-17",
|
||||
Temperature: 0.3,
|
||||
ResponseFormat: client.ResponseFormat{
|
||||
Type: "text",
|
||||
@ -42,15 +38,18 @@ func (agent DescriptionAgent) Describe(imageId uuid.UUID, imageName string, imag
|
||||
request.Chat.AddSystem(noteAgentPrompt)
|
||||
request.Chat.AddImage(imageName, imageData, nil)
|
||||
|
||||
log.Debug("Sending description request")
|
||||
resp, err := agent.client.Request(&request)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("Could not request", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
markdown := resp.Choices[0].Message.Content
|
||||
|
||||
log.Debugf("Response %s", markdown)
|
||||
|
||||
err = agent.imageModel.AddDescription(ctx, imageId, markdown)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -72,7 +72,11 @@ func ListenNewImageEvents(db *sql.DB, notifier *Notifier[Notification]) {
|
||||
}
|
||||
|
||||
descriptionAgent := agents.NewDescriptionAgent(createLogger("Description 📝", splitWriter), imageModel)
|
||||
descriptionAgent.Describe(image.Image.ID, image.Image.ImageName, image.Image.Image)
|
||||
err = descriptionAgent.Describe(createLogger("Description 📓", splitWriter), image.Image.ID, image.Image.ImageName, image.Image.Image)
|
||||
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
}
|
||||
|
||||
listAgent := agents.NewListAgent(createLogger("Lists 🖋️", splitWriter), listModel)
|
||||
listAgent.RunAgent(image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image)
|
||||
|
Reference in New Issue
Block a user