vllm0.6.1.post2
vllm0.6.1.post2
Published
A high-throughput and memory-efficient inference and serving engine for LLMs
pip install vllm
Package Downloads
Authors
Project URLs
Requires Python
>=3.8
Dependencies
- psutil
- sentencepiece
- numpy
<2.0.0
- requests
- tqdm
- py-cpuinfo
- transformers
>=4.43.2
- tokenizers
>=0.19.1
- protobuf
- aiohttp
- openai
>=1.40.0
- uvicorn
[standard]
- pydantic
>=2.9
- pillow
- prometheus-client
>=0.18.0
- prometheus-fastapi-instrumentator
>=7.0.0
- tiktoken
>=0.6.0
- lm-format-enforcer
==0.10.6
- outlines
<0.1,>=0.0.43
- typing-extensions
>=4.10
- filelock
>=3.10.4
- partial-json-parser
- pyzmq
- msgspec
- gguf
==0.9.1
- importlib-metadata
- mistral-common
>=1.4.0
- pyyaml
- einops
- ray
>=2.9
- nvidia-ml-py
- torch
==2.4.0
- torchvision
==0.19
- xformers
==0.0.27.post2; platform_system == "Linux" and platform_machine == "x86_64"
- vllm-flash-attn
==2.6.1; platform_system == "Linux" and platform_machine == "x86_64"
- fastapi
<0.113.0; python_version < "3.9"
- six
>=1.16.0; python_version > "3.11"
- fastapi
>=0.114.1; python_version >= "3.9"
- librosa
; extra == "audio"
- soundfile
; extra == "audio"
- tensorizer
>=2.9.0; extra == "tensorizer"
- opencv-python
; extra == "video"