Test Your Deployment
Use your deployment for inference:- CLI
- Python SDK
- JavaScript SDK
Copy
gravixlayer chat --model "test_model" --user "Hello, how are you?"
Copy
Hello! I'm doing well, thank you for asking. How can I assist you today?
Copy
import os
from gravixlayer import GravixLayer
client = GravixLayer()
# Test your deployment with a simple chat
response = client.chat.completions.create(
model="test_model",
messages=[
{"role": "user", "content": "Hello, how are you?"}
]
)
print(response.choices[0].message.content)
Copy
import { GravixLayer } from 'gravixlayer';
const client = new GravixLayer({
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
// Test your deployment with a simple chat
const response = await client.chat.completions.create({
model: "test_model",
messages: [
{"role": "user", "content": "Hello, how are you?"}
]
});
console.log(response.choices[0].message.content);
Chat Completions
Once your deployment is running, you can use it like any other model by referencing the deployment name:- CLI
- Python SDK
- JavaScript SDK
Basic Chat:Streaming Chat:With System Message:Text Completion Mode:Streaming Completion:
Copy
gravixlayer chat --model "test_model" --user "Hello, how are you?"
Copy
gravixlayer chat --model "test_model" --user "Tell me a story" --stream
Copy
gravixlayer chat --model "test_model" --system "You are a helpful assistant" --user "Explain quantum computing"
Copy
gravixlayer chat --mode completions --model "test_model" --prompt "The future of AI is"
Copy
gravixlayer chat --mode completions --model "test_model" --prompt "Write a poem about" --stream
Copy
import os
from gravixlayer import GravixLayer
client = GravixLayer()
# Basic chat completion
response = client.chat.completions.create(
model="test_model",
messages=[
{"role": "user", "content": "Hello, how are you?"}
]
)
print(response.choices[0].message.content)
# Streaming chat
stream = client.chat.completions.create(
model="test_model",
messages=[
{"role": "user", "content": "Tell me a story"}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
# With system message
response = client.chat.completions.create(
model="test_model",
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "Explain quantum computing"}
]
)
Copy
import { GravixLayer } from 'gravixlayer';
const client = new GravixLayer({
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
// Basic chat completion
const response = await client.chat.completions.create({
model: "test_model",
messages: [
{"role": "user", "content": "Hello, how are you?"}
]
});
console.log(response.choices[0].message.content);
// Streaming chat
const stream = await client.chat.completions.create({
model: "test_model",
messages: [
{"role": "user", "content": "Tell me a story"}
],
stream: true
});
for await (const chunk of stream) {
if (chunk.choices[0].delta.content !== null) {
process.stdout.write(chunk.choices[0].delta.content);
}
}
// With system message
const systemResponse = await client.chat.completions.create({
model: "test_model",
messages: [
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "Explain quantum computing"}
]
});
// Text completion mode
const completion = await client.completions.create({
model: "test_model",
prompt: "The future of AI is",
maxTokens: 100
});
console.log(completion.choices[0].text);
Advanced Usage Examples
Batch Processing
- Python SDK
- JavaScript SDK
Copy
import os
from gravixlayer import GravixLayer
client = GravixLayer()
# Process multiple prompts with your deployment
prompts = [
"Explain machine learning",
"What is artificial intelligence?",
"How do neural networks work?",
"Describe deep learning"
]
responses = []
for prompt in prompts:
response = client.chat.completions.create(
model="test_model",
messages=[{"role": "user", "content": prompt}],
max_tokens=100
)
responses.append({
"prompt": prompt,
"response": response.choices[0].message.content
})
for item in responses:
print(f"Q: {item['prompt']}")
print(f"A: {item['response']}")
print("---")
Copy
import { GravixLayer } from 'gravixlayer';
const client = new GravixLayer({
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
// Process multiple prompts with your deployment
const prompts = [
"Explain machine learning",
"What is artificial intelligence?",
"How do neural networks work?",
"Describe deep learning"
];
const responses = [];
for (const prompt of prompts) {
const response = await client.chat.completions.create({
model: "test_model",
messages: [{"role": "user", "content": prompt}],
max_tokens: 100
});
responses.push({
prompt: prompt,
response: response.choices[0].message.content
});
}
responses.forEach(item => {
console.log(`Q: ${item.prompt}`);
console.log(`A: ${item.response}`);
console.log("---");
});
Performance Monitoring
- Python SDK
- JavaScript SDK
Copy
import os
import time
from gravixlayer import GravixLayer
client = GravixLayer()
def benchmark_deployment(model_name, test_prompts, iterations=3):
"""Benchmark deployment performance"""
results = []
for prompt in test_prompts:
prompt_results = []
for i in range(iterations):
start_time = time.time()
response = client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
max_tokens=50
)
end_time = time.time()
response_time = (end_time - start_time) * 1000 # Convert to ms
prompt_results.append({
"iteration": i + 1,
"response_time_ms": response_time,
"response_length": len(response.choices[0].message.content)
})
avg_time = sum(r["response_time_ms"] for r in prompt_results) / len(prompt_results)
results.append({
"prompt": prompt,
"avg_response_time_ms": avg_time,
"iterations": prompt_results
})
return results
# Benchmark your deployment
test_prompts = [
"Hello, how are you?",
"Explain AI in one sentence",
"What is the weather like?"
]
benchmark_results = benchmark_deployment("test_model", test_prompts)
for result in benchmark_results:
print(f"Prompt: {result['prompt']}")
print(f"Average response time: {result['avg_response_time_ms']:.2f}ms")
print("---")
Copy
import { GravixLayer } from 'gravixlayer';
const client = new GravixLayer({
apiKey: process.env.GRAVIXLAYER_API_KEY,
});
async function benchmarkDeployment(modelName, testPrompts, iterations = 3) {
const results = [];
for (const prompt of testPrompts) {
const promptResults = [];
for (let i = 0; i < iterations; i++) {
const startTime = Date.now();
const response = await client.chat.completions.create({
model: modelName,
messages: [{"role": "user", "content": prompt}],
max_tokens: 50
});
const endTime = Date.now();
const responseTime = endTime - startTime;
promptResults.push({
iteration: i + 1,
response_time_ms: responseTime,
response_length: response.choices[0].message.content.length
});
}
const avgTime = promptResults.reduce((sum, r) => sum + r.response_time_ms, 0) / promptResults.length;
results.push({
prompt: prompt,
avg_response_time_ms: avgTime,
iterations: promptResults
});
}
return results;
}
// Benchmark your deployment
const testPrompts = [
"Hello, how are you?",
"Explain AI in one sentence",
"What is the weather like?"
];
const benchmarkResults = await benchmarkDeployment("test_model", testPrompts);
benchmarkResults.forEach(result => {
console.log(`Prompt: ${result.prompt}`);
console.log(`Average response time: ${result.avg_response_time_ms.toFixed(2)}ms`);
console.log("---");
});
Troubleshooting
Common Issues
Deployment Stuck in “Creating” Status:- Wait 5-10 minutes for initialization
- Check hardware availability with
gravixlayer deployments gpu --list - Verify model name is correct
- Ensure deployment status is “running” before making requests
- Verify deployment name matches exactly
- Check API key configuration
- Monitor deployment status and resource usage
- Consider scaling up replicas for higher throughput
- Check if model size matches your use case

