From 3df41c19cd2fc0acc189523ade9709caecc271fc Mon Sep 17 00:00:00 2001
From: Brando Miranda <brandojazz@gmail.com>
Date: Mon, 29 Apr 2024 12:53:54 -0700
Subject: [PATCH] all worked, added pytorch and vllm test

---
 README.md    | 36 ++++++++++++++++++++++++++++++---
 test_vllm.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 test_vllm.py

diff --git a/README.md b/README.md
index c0e3cc1..f07927c 100644
--- a/README.md
+++ b/README.md
@@ -33,22 +33,52 @@ The tests in `pantograph/server.py` also serve as simple interaction examples
 # install Lean4 manually (elan and lake)
 curl -sSf https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh | sh -s -- -y
 
+# make sure Lean4 tools (lean, lake) are available 
+export PATH="$HOME/.elan/bin:$PATH"
+echo 'export PATH="$HOME/.elan/bin:$PATH"' >> ~/.bashrc
+bash
+elan
+lake
+
 # create and activate the right python env (this is needed so that poetry build works)
 conda create -n pypantograph_env python=3.11 -y
 conda activate pypantograph_env
 
+# install poetry with python venv (needs seperate install so poetry & your projs deps don't crash)
+mkdir $HOME/.virtualenvs
+
+export VENV_PATH=$HOME/.virtualenvs/venv_for_poetry
+export PATH="$VENV_PATH/bin:$PATH"
+
+echo 'export VENV_PATH=$HOME/.virtualenvs/venv_for_poetry' >> ~/.bashrc
+echo 'export PATH="$VENV_PATH/bin:$PATH"' >> ~/.bashrc
+bash
+
+python3 -m venv $VENV_PATH
+$VENV_PATH/bin/pip install -U pip setuptools
+$VENV_PATH/bin/pip install poetry
+
+poetry
+
 # build the PyPantograph proj (build the py distribution, py deps and custom (lean4) installs)
+ln -s $AFS/PyPantograph $HOME/PyPantograph
 cd $HOME/PyPantograph
 poetry build
 
-# install pypantograph in editable mode (only pyproject.toml needed! Assuming your at the proj root)
+# install pypantograph in editable mode (only pyproject.toml (or setup.py!) needed! Assuming your at the proj root)
 pip install -e . 
 
 # confirm intalls
 pip list | grep pantograph
 pip list | grep vllm
-pip list | greo torch
+pip list | grep torch
+
+# select freeiest GPU wrt vRAM
+export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.used --format=csv,nounits,noheader | awk '{print NR-1 " " $1}' | sort -nk2 | head -n1 | cut -d' ' -f1)
+echo $CUDA_VISIBLE_DEVICES
 
 # make sure the PyPantrograph server tests by Leni work
-python -m server.py
+# python -m server.py
+python $HOME/PyPantograph/pantograph/server.py
+python $HOME/PyPantograph/test_vllm.py
 ```
diff --git a/test_vllm.py b/test_vllm.py
new file mode 100644
index 0000000..b3d1bfb
--- /dev/null
+++ b/test_vllm.py
@@ -0,0 +1,56 @@
+# copy pasted from https://docs.vllm.ai/en/latest/getting_started/quickstart.html
+
+# do export VLLM_USE_MODELSCOPE=True
+import vllm
+from vllm import LLM, SamplingParams
+    
+import torch
+
+def test_pytorch():
+    print('\n----- Test PyTorch ---')
+    # Print the PyTorch version and CUDA version
+    print(f"PyTorch version: {torch.__version__}")
+    print(f"CUDA version: {torch.version.cuda}")
+    
+    # Perform a matrix multiplication on CUDA and print the result
+    result = torch.randn(2, 4).cuda() @ torch.randn(4, 1).cuda()
+    print(f"Matrix multiplication result: {result}")
+    
+    # Check CUDA availability and device details
+    print(f'Number of CUDA devices: {torch.cuda.device_count()}')
+    if torch.cuda.device_count() > 0:
+        print(f'Device name: {torch.cuda.get_device_name(0)}')
+    else:
+        print("No CUDA devices available.")
+
+def test_vllm():
+    print('\n----- Test vLLM ---')
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+
+    llm = LLM(model="facebook/opt-125m")
+    # llm = LLM(model="mistralai/Mistral-7B-Instruct-v0.1")
+
+    outputs: iter = llm.generate(prompts, sampling_params)
+    print(f'{type(outputs)=}')
+    print(f'{type(outputs[0])=}')
+
+    # Print the outputs.
+    output: vllm.outputs.RequestOutput
+    for output in outputs:
+        prompt: str = output.prompt
+        generated_text: str = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+if __name__ == "__main__":
+    import time
+    start_time = time.time()
+    test_pytorch()
+    test_vllm()
+    print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a")
\ No newline at end of file