Chat Completions
Creates a chat completion for the provided messages.
POST /v1/inference/chat/completions
Generate a chat-based completion (OpenAI-compatible, supports streaming). Here are examples of Python, JavaScript SDK, and cURL command that make an API request from your terminal:
Request Headers
Content-Type: "application/json"
Authorization: Bearer GRAVIXLAYER_API_KEY
Example Usage
- cURL
- Python
- JavaScript
chat-completions.sh
curl -X POST https://api.gravixlayer.com/v1/inference/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $GRAVIXLAYER_API_KEY" \
-d '{
"model": "llama3.1:8b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'
Response:
{
"id": "chatcmpl-638",
"object": "chat.completion",
"created": 1752093069,
"model": "llama3.1:8b-instruct-fp16",
"system_fingerprint": "fp_ollama",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today?"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 11,
"completion_tokens": 10,
"total_tokens": 21
}
}
chat-completions.py
import os
import json
from openai import OpenAI
client = OpenAI(
api_key=os.environ.get("GRAVIXLAYER_API_KEY"),
base_url="https://api.gravixlayer.com/v1/inference"
)
completion = client.chat.completions.create(
model="llama3.1:8b-instruct-fp16",
messages=[
{"role": "user", "content": "Hello!"}
]
)
print(json.dumps(completion.model_dump(), indent=2))
Response:
{
"id": "chatcmpl-315",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1752096407,
"model": "llama3.1:8b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 25,
"prompt_tokens": 12,
"total_tokens": 37,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
chat-completions.js
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: process.env.GRAVIXLAYER_API_KEY,
baseURL: "https://api.gravixlayer.com/v1/inference"
});
async function ChatCompletion() {
const completion = await openai.chat.completions.create({
messages: [{ role: "user", content: "Hello" }],
model: "llama3.1:8b-instruct-fp16",
});
console.log(JSON.stringify(completion, null, 2));
}
ChatCompletion();
Response:
{
"id": "chatcmpl-798",
"object": "chat.completion",
"created": 1752096591,
"model": "llama3.1:8b-instruct-fp16",
"system_fingerprint": "fp_ollama",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "How can I assist you today?"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 11,
"completion_tokens": 8,
"total_tokens": 19
}
}