Querying Models with an Image URL
You can provide images to vision models by referencing a publicly accessible HTTP URL.- cURL
- Python - OpenAI
- Python - Gravix SDK
- JavaScript
- JavaScript - Gravix SDK
Copy
curl -X POST https://api.gravixlayer.com/v1/inference/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $GRAVIXLAYER_API_KEY" \
-d '{
"model": "google/gemma-3-12b-it",
"messages": [
{
"role": "user",
"content": [
{ "type": "text", "text": "Can you describe this image?" },
{
"type": "image_url",
"image_url": {
"url": "https://images.unsplash.com/photo-1720884413532-59289875c3e1?q=80&w=3024&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
}
}
]
}
]
}'
Copy
import json
import os
from openai import OpenAI
client = OpenAI(
base_url="https://api.gravixlayer.com/v1/inference",
api_key=os.environ.get("GRAVIXLAYER_API_KEY"),
)
response = client.chat.completions.create(
model="google/gemma-3-12b-it",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Can you describe this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://images.unsplash.com/photo-1720884413532-59289875c3e1?q=80&w=3024&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
}
}
],
}],
)
print(response.choices[0].message.content)
Copy
import os
from gravixlayer import GravixLayer
# Make sure to export your API key in the environment
# export GRAVIXLAYER_API_KEY=your_api_key_here
client = GravixLayer()
response = client.chat.completions.create(
model="google/gemma-3-12b-it",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Can you describe this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://images.unsplash.com/photo-1720884413532-59289875c3e1?q=80&w=3024&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
}
}
]
}],
)
print(response.choices[0].message.content)
Copy
import { OpenAI } from 'openai';
const client = new OpenAI({
baseURL: "https://api.gravixlayer.com/v1/inference",
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
async function main() {
const response = await client.chat.completions.create({
model: "google/gemma-3-12b-it",
messages: [{
role: "user",
content: [
{ type: "text", text: "Can you describe this image?" },
{
type: "image_url",
image_url: {
url: "https://images.unsplash.com/photo-1720884413532-59289875c3e1?q=80&w=3024&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
}
}
],
}],
});
console.log(response.choices[0].message.content);
}
main();
Copy
import { GravixLayer } from 'gravixlayer';
const client = new GravixLayer({
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
const response = await client.chat.completions.create({
model: 'google/gemma-3-12b-it',
messages: [
{ role: 'user', content: [ { type: 'text', text: 'Can you describe this image?' },
{ type: 'image_url', image_url: { url: 'https://images.unsplash.com/photo-1720884413532-59289875c3e1?q=80&w=3024' } } ] }
]
});
console.log(response.choices[0].message.content);
Querying Models with a Base64 Encoded Image
You can also provide images by embedding them directly into the request payload as a Base64 encoded string.- cURL
- Python - OpenAI
- Python - Gravix SDK
- JavaScript
- JavaScript - Gravix SDK
Copy
curl -X POST https://api.gravixlayer.com/v1/inference/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $GRAVIXLAYER_API_KEY" \
-d '{
"model": "google/gemma-3-12b-it",
"messages": [
{
"role": "user",
"content": [
{ "type": "text", "text": "Can you describe this image?" },
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,{base64_encoded_image}"
}
}
]
}
]
}'
Copy
import base64
import os
from openai import OpenAI
client = OpenAI(
base_url="https://api.gravixlayer.com/v1/inference",
api_key=os.environ.get("GRAVIXLAYER_API_KEY"),
)
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image = encode_image("path/to/your/image.jpg")
response = client.chat.completions.create(
model="google/gemma-3-12b-it",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Can you describe this image?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
],
}],
)
print(response.choices[0].message.content)
Copy
import base64
import os
from gravixlayer import GravixLayer
client = GravixLayer()
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image = encode_image("path/to/your/image.jpg")
response = client.chat.completions.create(
model="google/gemma-3-12b-it",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Can you describe this image?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}],
)
print(response.choices[0].message.content)
Copy
import { OpenAI } from 'openai';
import fs from 'fs';
const client = new OpenAI({
baseURL: "https://api.gravixlayer.com/v1/inference",
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
function encodeImage(imagePath) {
const image = fs.readFileSync(imagePath);
return Buffer.from(image).toString('base64');
}
async function main() {
const base64Image = encodeImage('path/to/your/image.jpg');
const response = await client.chat.completions.create({
model: "google/gemma-3-12b-it",
messages: [{
role: "user",
content: [
{ type: "text", text: "Can you describe this image?" },
{
type: "image_url",
image_url: {
url: `data:image/jpeg;base64,${base64Image}`
}
}
],
}],
});
console.log(response.choices[0].message.content);
}
main();
Copy
import { GravixLayer } from 'gravixlayer';
import { readFileSync } from 'fs';
const client = new GravixLayer({
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
async function main() {
const response = await client.chat.completions.create({
model: 'google/gemma-3-12b-it',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Can you describe this image?' },
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${readFileSync('./path/to/image.png').toString('base64')}`
}
}
]
}
]
});
console.log(response.choices[0].message.content);
}
main();
Common applications for VLMs include:
- Image Captioning: Automatically generating descriptive text for images.
- Visual Question Answering (VQA): Answering questions based on the content of an image.
- Document Analysis: Extracting and interpreting information from scanned documents or forms.
- Chart Interpretation: Analyzing data visualizations like graphs and charts.
- Optical Character Recognition (OCR): Extracting printed or handwritten text from images.
- Content Moderation: Identifying and flagging inappropriate or sensitive visual content.

