{
"model": {
"instant_provisioned": true,
"created_at": "2022-01-01T00:00:00Z",
"screen_name": "Llama 3.1 70B",
"model_name": "meta-llama--Meta-Llama-3.1-70B-Instruct",
"hf_name": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"license": "https://llama.meta.com/llama3/license/",
"size": 70600000000,
"context_length": 131072,
"creator": {
"name": "Meta",
"url": "https://ai.meta.com"
},
"quantization": "fp8",
"description": "The Llama 3.1 instruction tuned text only models...",
"tags": [
"Instruct"
],
"recommended_prompt": "<s>[INST] {{ prompt }}[/INST]",
"prompt_example": "<s>[INST] Tell me a joke.[/INST]",
"bits": "16",
"required_disk_size": 141.11,
"ignore_patterns": [
"consolidated.safetensors"
],
"recommended_variant": true,
"variants": {
"FP8": "neuralmagic--Meta-Llama-3.1-8B-Instruct-FP8",
"Original": "meta-llama--Meta-Llama-3.1-8B-Instruct"
},
"required_VRAM_GB": 141.11,
"recommended_config": "NVIDIA_H100_2",
"hardware_configs": [
{
"params": {
"max_context_length": 71400,
"gpu_util": 0.95,
"gpu_count": 1,
"enforce_eager": false,
"dtype": "auto"
},
"requirements": {
"vllm": "0.6.3"
}
}
]
}
}