From 40b8d89ee7a777cfeedcc3e85f5bbd85594e381f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Limpinho?= <53994778+TomasLimpinho@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:25:54 +0000 Subject: [PATCH] nvidia in all nodes --- playbook.yml | 4 +- roles/nvidia/tasks/main.yml | 94 +++++++++++++++++++++++++++++++++++++ roles/nvidia/vars/main.yml | 4 ++ 3 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 roles/nvidia/tasks/main.yml create mode 100644 roles/nvidia/vars/main.yml diff --git a/playbook.yml b/playbook.yml index ecfa264..fd43edb 100644 --- a/playbook.yml +++ b/playbook.yml @@ -6,6 +6,7 @@ - vaultwarden - kubernetes - kube-master + - nvidia - name: Configure Kubernetes Nodes hosts: workers @@ -15,7 +16,8 @@ - vaultwarden - kubernetes - kube-node - + - nvidia + - name: Configure Kubernetes Data hosts: master become: yes diff --git a/roles/nvidia/tasks/main.yml b/roles/nvidia/tasks/main.yml new file mode 100644 index 0000000..9f5c062 --- /dev/null +++ b/roles/nvidia/tasks/main.yml @@ -0,0 +1,94 @@ +- name: Instalar as nvidia drivers + become: yes + become_user: fenix + shell: | + sudo apt install nvidia-driver-550 + sudo apt install nvidia-utils-550 + environment: + KUBECONFIG: /home/fenix/.kube/config + +- name: Reboot remoto e continuar + become: yes + become_user: fenix + tasks: + - name: Reboot host e esperar voltar + ansible.builtin.reboot: + reboot_timeout: 600 # tempo máximo para o host voltar (segundos) + test_command: whoami # comando usado para validar que o host voltou + + +- name: Adicionar chave GPG da NVIDIA + become: yes + become_user: fenix + ansible.builtin.shell: | + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg + args: + creates: /usr/share/keyrings/nvidia-container-toolkit.gpg + + +- name: Adicionar repositório NVIDIA + become: yes + become_user: fenix + ansible.builtin.shell: | + curl -s -L https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit.gpg] https://#' | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + args: + creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list + + +- name: Atualizar cache de pacotes + become: yes + become_user: fenix + ansible.builtin.apt: + update_cache: yes + +- name: Instalar NVIDIA Container Toolkit + become: yes + become_user: fenix + ansible.builtin.apt: + name: nvidia-container-toolkit + state: present + + +- name: Configurar runtime Docker + become: yes + become_user: fenix + ansible.builtin.shell: | + nvidia-ctk runtime configure --runtime=docker + when: "'docker.io' in ansible_facts.packages" + +- name: Reiniciar Docker + become: yes + become_user: fenix + ansible.builtin.systemd: + name: docker + state: restarted + when: "'docker.io' in ansible_facts.packages" + +- name: Configurar runtime containerd + become: yes + become_user: fenix + ansible.builtin.shell: | + nvidia-ctk runtime configure --runtime=containerd + when: "'containerd' in ansible_facts.packages" + +- name: Reiniciar containerd + become: yes + become_user: fenix + ansible.builtin.systemd: + name: containerd + state: restarted + when: "'containerd' in ansible_facts.packages" + +- name: Validar GPU dentro de container + become: yes + become_user: fenix + ansible.builtin.shell: | + docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.2-base nvidia-smi + register: nvidia_smi_output + ignore_errors: true + +- name: Mostrar resultado nvidia-smi + become: yes + become_user: fenix + ansible.builtin.debug: + var: nvidia_smi_output.stdout \ No newline at end of file diff --git a/roles/nvidia/vars/main.yml b/roles/nvidia/vars/main.yml new file mode 100644 index 0000000..c8fe60b --- /dev/null +++ b/roles/nvidia/vars/main.yml @@ -0,0 +1,4 @@ +bw_password: "{{ lookup('env', 'BW_PASSWORD') }}" +VAULTWARDEN_LINK: "{{ lookup('env', 'VAULTWARDEN_LINK') }}" +BW_CLIENTID: "{{ lookup('env', 'BW_CLIENTID') }}" +BW_CLIENTSECRET : "{{ lookup('env', 'BW_CLIENTSECRET') }}" \ No newline at end of file