feat: finishing description agent infrastructure on backend
This commit is contained in:
@ -4,6 +4,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
@ -101,7 +102,7 @@ func CreateAgentClient(options CreateAgentClientOptions) AgentClient {
|
|||||||
|
|
||||||
return AgentClient{
|
return AgentClient{
|
||||||
apiKey: apiKey,
|
apiKey: apiKey,
|
||||||
url: "https://api.openai.com/v1/chat/completions",
|
url: "https://router.requesty.ai/v1/chat/completions",
|
||||||
Do: func(req *http.Request) (*http.Response, error) {
|
Do: func(req *http.Request) (*http.Response, error) {
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
return client.Do(req)
|
return client.Do(req)
|
||||||
@ -132,29 +133,29 @@ func (client AgentClient) getRequest(body []byte) (*http.Request, error) {
|
|||||||
func (client AgentClient) Request(req *AgentRequestBody) (AgentResponse, error) {
|
func (client AgentClient) Request(req *AgentRequestBody) (AgentResponse, error) {
|
||||||
jsonAiRequest, err := json.Marshal(req)
|
jsonAiRequest, err := json.Marshal(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return AgentResponse{}, err
|
return AgentResponse{}, fmt.Errorf("Could not format JSON", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
httpRequest, err := client.getRequest(jsonAiRequest)
|
httpRequest, err := client.getRequest(jsonAiRequest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return AgentResponse{}, err
|
return AgentResponse{}, fmt.Errorf("Could not get request", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := client.Do(httpRequest)
|
resp, err := client.Do(httpRequest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return AgentResponse{}, err
|
return AgentResponse{}, fmt.Errorf("Could not send request", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := io.ReadAll(resp.Body)
|
response, err := io.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return AgentResponse{}, err
|
return AgentResponse{}, fmt.Errorf("Could not read body", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
agentResponse := AgentResponse{}
|
agentResponse := AgentResponse{}
|
||||||
err = json.Unmarshal(response, &agentResponse)
|
err = json.Unmarshal(response, &agentResponse)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return AgentResponse{}, err
|
return AgentResponse{}, fmt.Errorf("Could not unmarshal response, response: %s", string(response), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(agentResponse.Choices) != 1 {
|
if len(agentResponse.Choices) != 1 {
|
||||||
@ -245,7 +246,7 @@ func (client *AgentClient) RunAgent(userId uuid.UUID, imageId uuid.UUID, imageNa
|
|||||||
request := AgentRequestBody{
|
request := AgentRequestBody{
|
||||||
Tools: &tools,
|
Tools: &tools,
|
||||||
ToolChoice: &toolChoice,
|
ToolChoice: &toolChoice,
|
||||||
Model: "gpt-4.1-mini",
|
Model: "google/gemini-2.5-flash",
|
||||||
RandomSeed: &seed,
|
RandomSeed: &seed,
|
||||||
Temperature: 0.3,
|
Temperature: 0.3,
|
||||||
EndToolCall: client.Options.EndToolCall,
|
EndToolCall: client.Options.EndToolCall,
|
||||||
|
@ -2,6 +2,7 @@ package agents
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"screenmark/screenmark/agents/client"
|
"screenmark/screenmark/agents/client"
|
||||||
"screenmark/screenmark/models"
|
"screenmark/screenmark/models"
|
||||||
|
|
||||||
@ -10,15 +11,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const noteAgentPrompt = `
|
const noteAgentPrompt = `
|
||||||
You are a helpful agent, who's job is to extract notes from images.
|
You are an AI agent who's job is to describe the image you see.
|
||||||
Not all images contain notes, in such cases there's not need to create them.
|
|
||||||
|
|
||||||
An image can have more than one note.
|
You should also add any text you see in the image, if no text exists, just add a description.
|
||||||
|
Be consise and don't add too much extra information or formatting characters, simple text.
|
||||||
You must return markdown, and adapt the text to best fit markdown.
|
|
||||||
Do not return anything except markdown.
|
|
||||||
|
|
||||||
If the image contains code, add this inside code blocks. You must try and correctly guess the language too.
|
|
||||||
`
|
`
|
||||||
|
|
||||||
type DescriptionAgent struct {
|
type DescriptionAgent struct {
|
||||||
@ -27,9 +23,9 @@ type DescriptionAgent struct {
|
|||||||
imageModel models.ImageModel
|
imageModel models.ImageModel
|
||||||
}
|
}
|
||||||
|
|
||||||
func (agent DescriptionAgent) Describe(imageId uuid.UUID, imageName string, imageData []byte) error {
|
func (agent DescriptionAgent) Describe(log *log.Logger, imageId uuid.UUID, imageName string, imageData []byte) error {
|
||||||
request := client.AgentRequestBody{
|
request := client.AgentRequestBody{
|
||||||
Model: "gpt-4.1-nano",
|
Model: "google/gemini-2.5-flash-lite-preview-06-17",
|
||||||
Temperature: 0.3,
|
Temperature: 0.3,
|
||||||
ResponseFormat: client.ResponseFormat{
|
ResponseFormat: client.ResponseFormat{
|
||||||
Type: "text",
|
Type: "text",
|
||||||
@ -42,15 +38,18 @@ func (agent DescriptionAgent) Describe(imageId uuid.UUID, imageName string, imag
|
|||||||
request.Chat.AddSystem(noteAgentPrompt)
|
request.Chat.AddSystem(noteAgentPrompt)
|
||||||
request.Chat.AddImage(imageName, imageData, nil)
|
request.Chat.AddImage(imageName, imageData, nil)
|
||||||
|
|
||||||
|
log.Debug("Sending description request")
|
||||||
resp, err := agent.client.Request(&request)
|
resp, err := agent.client.Request(&request)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("Could not request", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
markdown := resp.Choices[0].Message.Content
|
markdown := resp.Choices[0].Message.Content
|
||||||
|
|
||||||
|
log.Debugf("Response %s", markdown)
|
||||||
|
|
||||||
err = agent.imageModel.AddDescription(ctx, imageId, markdown)
|
err = agent.imageModel.AddDescription(ctx, imageId, markdown)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -72,7 +72,11 @@ func ListenNewImageEvents(db *sql.DB, notifier *Notifier[Notification]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
descriptionAgent := agents.NewDescriptionAgent(createLogger("Description 📝", splitWriter), imageModel)
|
descriptionAgent := agents.NewDescriptionAgent(createLogger("Description 📝", splitWriter), imageModel)
|
||||||
descriptionAgent.Describe(image.Image.ID, image.Image.ImageName, image.Image.Image)
|
err = descriptionAgent.Describe(createLogger("Description 📓", splitWriter), image.Image.ID, image.Image.ImageName, image.Image.Image)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
listAgent := agents.NewListAgent(createLogger("Lists 🖋️", splitWriter), listModel)
|
listAgent := agents.NewListAgent(createLogger("Lists 🖋️", splitWriter), listModel)
|
||||||
listAgent.RunAgent(image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image)
|
listAgent.RunAgent(image.UserID, image.ImageID, image.Image.ImageName, image.Image.Image)
|
||||||
|
Reference in New Issue
Block a user