Skip to main content

Tutorial

This tutorial demonstrates how to use the GravixLayer Python SDK with simple, practical examples.

1. Setup

First, install the GravixLayer Python SDK:

pip install gravixlayer

Set your API key as an environment variable:

export GRAVIXLAYER_API_KEY="your_api_key_here"

2. Your First Request

Create a new file called main.py:

import os
from gravixlayer import GravixLayer

client = GravixLayer()

completion = client.chat.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
messages=[{"role": "user", "content": "Hello, world!"}]
)

print(completion.choices[0].message.content)

Run the script:

python main.py

You should see a response from the model!

3. Chat Completions

Simple conversation with the AI:

from gravixlayer import GravixLayer

client = GravixLayer()

response = client.chat.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is Python?"}
]
)

print(response.choices[0].message.content)

4. Text Completions

Simple text completion from a prompt:

from gravixlayer import GravixLayer

client = GravixLayer()

completion = client.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
prompt="The future of artificial intelligence is",
max_tokens=50
)

print(completion.choices[0].text.strip())

5. Streaming Responses

Get responses in real-time:

from gravixlayer import GravixLayer

client = GravixLayer()

# Chat streaming
stream = client.chat.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
messages=[{"role": "user", "content": "Tell me a short story"}],
stream=True
)

for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
print() # New line

# Completions streaming
stream = client.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
prompt="Write a poem about",
max_tokens=100,
stream=True
)

for chunk in stream:
if chunk.choices[0].text is not None:
print(chunk.choices[0].text, end="", flush=True)
print() # New line

6. Embeddings

Generate text embeddings:

import os
import json
from gravixlayer import GravixLayer

client = GravixLayer(
api_key=os.environ.get("GRAVIXLAYER_API_KEY"),
)

embedding = client.embeddings.create(
model="meta-llama/llama-3.1-8b-instruct",
input="Why is the sky blue?",
)

print(json.dumps(embedding.model_dump(), indent=2))

7. Function Calling

Let AI call your functions:

import os
import json
import requests
from gravixlayer import GravixLayer

# Define a simple function
def get_weather(latitude, longitude):
"""Get current temperature for coordinates."""
url = f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&current=temperature_2m"
response = requests.get(url)
data = response.json()
return data['current']['temperature_2m']

client = GravixLayer()

# Define the function for the AI
tools = [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current temperature for a location",
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"}
},
"required": ["latitude", "longitude"]
}
}
}]

# Ask AI to call the function
messages = [{"role": "user", "content": "What's the weather in Paris?"}]

completion = client.chat.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
messages=messages,
tools=tools,
tool_choice="auto"
)

response_message = completion.choices[0].message
messages.append(response_message)

# Check if AI wants to call function
if response_message.tool_calls:
tool_call = response_message.tool_calls[0]

# Call the function
args = json.loads(tool_call.function.arguments)
result = get_weather(args["latitude"], args["longitude"])

# Send result back to AI
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"name": "get_weather",
"content": str(result)
})

# Get final response
final_completion = client.chat.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
messages=messages,
tools=tools
)

print(final_completion.choices[0].message.content)

8. Building a Simple Chatbot

Create an interactive chatbot with conversation memory:

from gravixlayer import GravixLayer

class SimpleChatbot:
def __init__(self):
self.client = GravixLayer()
self.conversation = []

def chat(self, message):
# Add user message to conversation
self.conversation.append({"role": "user", "content": message})

# Create messages with system prompt
messages = [
{"role": "system", "content": "You are a helpful assistant."}
] + self.conversation

# Get response from the model
completion = self.client.chat.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
messages=messages,
temperature=0.7,
max_tokens=150
)

# Add assistant response to conversation
assistant_message = completion.choices[0].message.content
self.conversation.append({"role": "assistant", "content": assistant_message})

return assistant_message

# Use the chatbot
bot = SimpleChatbot()
print(bot.chat("What is Python programming?"))
print(bot.chat("Can you give me an example?"))

9. Async Operations

import asyncio
import os
from gravixlayer import AsyncGravixLayer

class AsyncChatbot:
def __init__(self):
self.client = AsyncGravixLayer()

async def chat(self, message):
completion = await self.client.chat.completions.create(
model="meta-llama/llama-3.1-8b-instruct",
messages=[{"role": "user", "content": message}],
temperature=0.7
)
return completion.choices[0].message.content

async def chat_multiple(self, messages):
"""Handle multiple messages concurrently"""
tasks = [self.chat(msg) for msg in messages]
return await asyncio.gather(*tasks)

async def main():
bot = AsyncChatbot()

# Single request
response = await bot.chat("What is Python?")
print(f"Single: {response}")

# Multiple concurrent requests
messages = [
"What is machine learning?",
"Explain neural networks",
"What is deep learning?"
]

responses = await bot.chat_multiple(messages)
for i, response in enumerate(responses):
print(f"Response {i+1}: {response[:100]}...")

asyncio.run(main())

10. CLI Usage

Use the command line interface:

# Chat completion
gravixlayer --model "meta-llama/llama-3.1-8b-instruct" --user "Hello!"

# Text completion
gravixlayer --mode completions --model "meta-llama/llama-3.1-8b-instruct" --prompt "The future of AI is"

# Chat with system message
gravixlayer --model "meta-llama/llama-3.1-8b-instruct" --system "You are a helpful assistant" --user "Explain AI"

# Streaming chat
gravixlayer --model "meta-llama/llama-3.1-8b-instruct" --user "Tell a story" --stream

# Streaming completion
gravixlayer --mode completions --model "meta-llama/llama-3.1-8b-instruct" --prompt "Write a poem" --stream

11. Deployment Management

Manage dedicated model deployments using the CLI:

import os
from gravixlayer import GravixLayer

# Initialize the client
client = GravixLayer(api_key=os.environ.get("GRAVIXLAYER_API_KEY"))

# Create a deployment
deployment = client.deployments.create(
deployment_name="custom_model",
hw_type="dedicated",
hardware="nvidia-t4-16gb-pcie_1",
min_replicas=1,
model_name="qwen3-1.7b"
)
print(f"Created deployment: {deployment.id}")

# List all deployments
deployments = client.deployments.list()
for deployment in deployments:
print(f"Deployment: {deployment.name} - Status: {deployment.status}")

# Delete a deployment
client.deployments.delete(deployment_id="your_deployment_id")

# List available hardware
hardware_options = client.deployments.list_hardware()
for hardware in hardware_options:
print(f"Hardware: {hardware.name} - Memory: {hardware.memory}")

# Get hardware as JSON
hardware_json = client.deployments.list_hardware(format="json")
print(hardware_json)

Deployment Benefits

Dedicated deployments provide:

  • Guaranteed capacity with no cold starts
  • Consistent performance and low latency
  • Isolated resources for enterprise workloads
  • Custom scaling policies and configurations

Conclusion

This tutorial covered the usage of the GravixLayer Python SDK:

  • Chat Completions - Conversational AI
  • Text Completions - Prompt-based text generation
  • Embeddings - Text similarity and search
  • Streaming - Real-time responses
  • Function Calling - AI tool integration
  • Chatbot - Interactive conversation with memory
  • Async Support - High-performance operations
  • Deployment Management - Dedicated model instances
  • CLI Interface - Command-line usage

Each example is simple and can be used as a starting point for your applications. For more advanced features, check the Dedicated Deployments documentation.