diff --git a/README.md b/README.md index c0e3cc1..f07927c 100644 --- a/README.md +++ b/README.md @@ -33,22 +33,52 @@ The tests in `pantograph/server.py` also serve as simple interaction examples # install Lean4 manually (elan and lake) curl -sSf https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh | sh -s -- -y +# make sure Lean4 tools (lean, lake) are available +export PATH="$HOME/.elan/bin:$PATH" +echo 'export PATH="$HOME/.elan/bin:$PATH"' >> ~/.bashrc +bash +elan +lake + # create and activate the right python env (this is needed so that poetry build works) conda create -n pypantograph_env python=3.11 -y conda activate pypantograph_env +# install poetry with python venv (needs seperate install so poetry & your projs deps don't crash) +mkdir $HOME/.virtualenvs + +export VENV_PATH=$HOME/.virtualenvs/venv_for_poetry +export PATH="$VENV_PATH/bin:$PATH" + +echo 'export VENV_PATH=$HOME/.virtualenvs/venv_for_poetry' >> ~/.bashrc +echo 'export PATH="$VENV_PATH/bin:$PATH"' >> ~/.bashrc +bash + +python3 -m venv $VENV_PATH +$VENV_PATH/bin/pip install -U pip setuptools +$VENV_PATH/bin/pip install poetry + +poetry + # build the PyPantograph proj (build the py distribution, py deps and custom (lean4) installs) +ln -s $AFS/PyPantograph $HOME/PyPantograph cd $HOME/PyPantograph poetry build -# install pypantograph in editable mode (only pyproject.toml needed! Assuming your at the proj root) +# install pypantograph in editable mode (only pyproject.toml (or setup.py!) needed! Assuming your at the proj root) pip install -e . # confirm intalls pip list | grep pantograph pip list | grep vllm -pip list | greo torch +pip list | grep torch + +# select freeiest GPU wrt vRAM +export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.used --format=csv,nounits,noheader | awk '{print NR-1 " " $1}' | sort -nk2 | head -n1 | cut -d' ' -f1) +echo $CUDA_VISIBLE_DEVICES # make sure the PyPantrograph server tests by Leni work -python -m server.py +# python -m server.py +python $HOME/PyPantograph/pantograph/server.py +python $HOME/PyPantograph/test_vllm.py ``` diff --git a/test_vllm.py b/test_vllm.py new file mode 100644 index 0000000..b3d1bfb --- /dev/null +++ b/test_vllm.py @@ -0,0 +1,56 @@ +# copy pasted from https://docs.vllm.ai/en/latest/getting_started/quickstart.html + +# do export VLLM_USE_MODELSCOPE=True +import vllm +from vllm import LLM, SamplingParams + +import torch + +def test_pytorch(): + print('\n----- Test PyTorch ---') + # Print the PyTorch version and CUDA version + print(f"PyTorch version: {torch.__version__}") + print(f"CUDA version: {torch.version.cuda}") + + # Perform a matrix multiplication on CUDA and print the result + result = torch.randn(2, 4).cuda() @ torch.randn(4, 1).cuda() + print(f"Matrix multiplication result: {result}") + + # Check CUDA availability and device details + print(f'Number of CUDA devices: {torch.cuda.device_count()}') + if torch.cuda.device_count() > 0: + print(f'Device name: {torch.cuda.get_device_name(0)}') + else: + print("No CUDA devices available.") + +def test_vllm(): + print('\n----- Test vLLM ---') + prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + + + llm = LLM(model="facebook/opt-125m") + # llm = LLM(model="mistralai/Mistral-7B-Instruct-v0.1") + + outputs: iter = llm.generate(prompts, sampling_params) + print(f'{type(outputs)=}') + print(f'{type(outputs[0])=}') + + # Print the outputs. + output: vllm.outputs.RequestOutput + for output in outputs: + prompt: str = output.prompt + generated_text: str = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + +if __name__ == "__main__": + import time + start_time = time.time() + test_pytorch() + test_vllm() + print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a") \ No newline at end of file