package gemini import ( "bytes" "context" "encoding/base64" "encoding/json" "fmt" "io" "net/http" "time" ) const ( BaseURL = "https://generativelanguage.googleapis.com/v1beta/models" ImageModel = "gemini-2.0-flash-exp" ImageModelV2 = "gemini-2.5-flash-preview-image-generation" TextModel = "gemini-2.5-flash" ) type Client struct { APIKey string http *http.Client } type part struct { Text string `json:"text,omitempty"` InlineData *inlineData `json:"inlineData,omitempty"` } type inlineData struct { MIMEType string `json:"mimeType"` Data string `json:"data"` // base64 } type content struct { Parts []part `json:"parts"` } type generateRequest struct { Contents []content `json:"contents"` GenerationConfig generationConfig `json:"generationConfig"` } type imagenConfig struct { AspectRatio string `json:"aspectRatio,omitempty"` } type generationConfig struct { ResponseModalities []string `json:"responseModalities"` ImagenConfig *imagenConfig `json:"imagenConfig,omitempty"` } type generateResponse struct { Candidates []struct { Content struct { Parts []struct { Text string `json:"text,omitempty"` InlineData *inlineData `json:"inlineData,omitempty"` } `json:"parts"` } `json:"content"` } `json:"candidates"` Error *struct { Message string `json:"message"` } `json:"error,omitempty"` } func New(apiKey string) *Client { return &Client{ APIKey: apiKey, http: &http.Client{Timeout: 180 * time.Second}, } } // Chat sends a system prompt + user message and returns the text response. // model: e.g. gemini.TextModel ("gemini-2.5-flash") func (c *Client) Chat(ctx context.Context, model, systemPrompt, userMsg string) (string, error) { type chatContent struct { Role string `json:"role"` Parts []part `json:"parts"` } type chatRequest struct { SystemInstruction *chatContent `json:"system_instruction,omitempty"` Contents []chatContent `json:"contents"` GenerationConfig generationConfig `json:"generationConfig"` } req := chatRequest{ SystemInstruction: &chatContent{ Parts: []part{{Text: systemPrompt}}, }, Contents: []chatContent{ {Role: "user", Parts: []part{{Text: userMsg}}}, }, GenerationConfig: generationConfig{ ResponseModalities: []string{"TEXT"}, }, } data, err := json.Marshal(req) if err != nil { return "", fmt.Errorf("serializar request: %w", err) } delays := []time.Duration{2 * time.Second, 4 * time.Second, 8 * time.Second} var lastErr error for attempt := 0; attempt <= len(delays); attempt++ { if attempt > 0 { select { case <-ctx.Done(): return "", ctx.Err() case <-time.After(delays[attempt-1]): } } url := fmt.Sprintf("%s/%s:generateContent?key=%s", BaseURL, model, c.APIKey) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(data)) if err != nil { return "", fmt.Errorf("criar request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") resp, err := c.http.Do(httpReq) if err != nil { lastErr = fmt.Errorf("HTTP: %w", err) continue } body, err := io.ReadAll(resp.Body) resp.Body.Close() if err != nil { lastErr = fmt.Errorf("ler resposta: %w", err) continue } if resp.StatusCode == 429 || resp.StatusCode >= 500 { lastErr = fmt.Errorf("status %d: %s", resp.StatusCode, string(body)) continue } var gr generateResponse if err := json.Unmarshal(body, &gr); err != nil { return "", fmt.Errorf("parsear resposta: %w (body: %s)", err, string(body)) } if gr.Error != nil { return "", fmt.Errorf("gemini: %s", gr.Error.Message) } if len(gr.Candidates) == 0 || len(gr.Candidates[0].Content.Parts) == 0 { return "", fmt.Errorf("gemini: resposta vazia (body: %s)", string(body)) } for _, p := range gr.Candidates[0].Content.Parts { if p.Text != "" { return p.Text, nil } } return "", fmt.Errorf("gemini: sem texto na resposta") } return "", fmt.Errorf("gemini: falha após %d tentativas: %w", len(delays)+1, lastErr) } // GenerateImage sends a text prompt and returns raw PNG bytes. func (c *Client) GenerateImage(ctx context.Context, model, prompt string) ([]byte, error) { return c.generate(ctx, model, prompt, generationConfig{ ResponseModalities: []string{"IMAGE"}, }) } // GenerateImageSquare generates a 1:1 aspect-ratio image using imagenConfig. // Use with gemini-2.5-flash-preview-image-generation. func (c *Client) GenerateImageSquare(ctx context.Context, model, prompt string) ([]byte, error) { return c.generate(ctx, model, prompt, generationConfig{ ResponseModalities: []string{"IMAGE"}, ImagenConfig: &imagenConfig{AspectRatio: "1:1"}, }) } func (c *Client) generate(ctx context.Context, model, prompt string, cfg generationConfig) ([]byte, error) { reqBody := generateRequest{ Contents: []content{ {Parts: []part{{Text: prompt}}}, }, GenerationConfig: cfg, } data, err := json.Marshal(reqBody) if err != nil { return nil, fmt.Errorf("serializar request: %w", err) } delays := []time.Duration{1 * time.Second, 2 * time.Second, 4 * time.Second} var lastErr error for attempt := 0; attempt <= len(delays); attempt++ { if attempt > 0 { select { case <-ctx.Done(): return nil, ctx.Err() case <-time.After(delays[attempt-1]): } } url := fmt.Sprintf("%s/%s:generateContent?key=%s", BaseURL, model, c.APIKey) req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(data)) if err != nil { return nil, fmt.Errorf("criar request: %w", err) } req.Header.Set("Content-Type", "application/json") resp, err := c.http.Do(req) if err != nil { lastErr = fmt.Errorf("HTTP: %w", err) continue } body, err := io.ReadAll(resp.Body) resp.Body.Close() if err != nil { lastErr = fmt.Errorf("ler resposta: %w", err) continue } if resp.StatusCode == 429 || resp.StatusCode >= 500 { lastErr = fmt.Errorf("status %d: %s", resp.StatusCode, string(body)) continue } var gr generateResponse if err := json.Unmarshal(body, &gr); err != nil { return nil, fmt.Errorf("parsear resposta: %w (body: %s)", err, string(body)) } if gr.Error != nil { return nil, fmt.Errorf("gemini: %s", gr.Error.Message) } if len(gr.Candidates) == 0 { return nil, fmt.Errorf("gemini: sem candidatos na resposta") } for _, p := range gr.Candidates[0].Content.Parts { if p.InlineData != nil { imgBytes, err := base64.StdEncoding.DecodeString(p.InlineData.Data) if err != nil { return nil, fmt.Errorf("decodificar imagem base64: %w", err) } return imgBytes, nil } } return nil, fmt.Errorf("gemini: nenhuma imagem na resposta") } return nil, fmt.Errorf("gemini: falha após %d tentativas: %w", len(delays)+1, lastErr) }