From cb1bf070b0c89aff8d56f5cfc29dcb4fe9a269f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Limpinho?= <53994778+TomasLimpinho@users.noreply.github.com> Date: Mon, 8 Dec 2025 19:31:29 +0000 Subject: [PATCH] nvidia-base vsnvidia frfom kubernetes --- playbook.yml | 5 ++- roles/nvidia-base/tasks/main.yml | 66 +++++++++++++++++++++++++++++++ roles/nvidia/tasks/main.yml | 68 -------------------------------- roles/nvidia/vars/main.yml | 4 -- 4 files changed, 69 insertions(+), 74 deletions(-) create mode 100644 roles/nvidia-base/tasks/main.yml delete mode 100644 roles/nvidia/vars/main.yml diff --git a/playbook.yml b/playbook.yml index fd43edb..d532ad3 100644 --- a/playbook.yml +++ b/playbook.yml @@ -6,7 +6,7 @@ - vaultwarden - kubernetes - kube-master - - nvidia + - nvidia-base - name: Configure Kubernetes Nodes hosts: workers @@ -16,12 +16,13 @@ - vaultwarden - kubernetes - kube-node - - nvidia + - nvidia-base - name: Configure Kubernetes Data hosts: master become: yes roles: + - nvidia - stolon - cloudflared - qbittorrent diff --git a/roles/nvidia-base/tasks/main.yml b/roles/nvidia-base/tasks/main.yml new file mode 100644 index 0000000..ae53e81 --- /dev/null +++ b/roles/nvidia-base/tasks/main.yml @@ -0,0 +1,66 @@ +- name: Instalar driver NVIDIA + become: yes + ansible.builtin.apt: + name: nvidia-driver-550 + state: present + +- name: Instalar utilitários NVIDIA + become: yes + ansible.builtin.apt: + name: nvidia-utils-550 + state: present + + +- name: Reboot host e esperar voltar + become: yes + ansible.builtin.reboot: + reboot_timeout: 600 # tempo máximo para o host voltar (segundos) + test_command: whoami # comando usado para validar que o host voltou + +- name: Adicionar chave GPG da NVIDIA + become: yes + become_user: fenix + ansible.builtin.shell: | + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg + args: + creates: /usr/share/keyrings/nvidia-container-toolkit.gpg + +- name: Adicionar repositório NVIDIA + become: yes + ansible.builtin.shell: | + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + args: + creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list + +- name: Atualizar cache de pacotes + become: yes + ansible.builtin.apt: + update_cache: yes + +- name: Instalar NVIDIA Container Toolkit + become: yes + ansible.builtin.apt: + name: nvidia-container-toolkit + state: present + + +- name: Configurar runtime containerd + become: yes + ansible.builtin.shell: | + nvidia-ctk runtime configure --runtime=containerd + +- name: Reiniciar containerd + become: yes + ansible.builtin.systemd: + name: containerd + state: restarted + + +- name: Mostrar resultado nvidia-smi + become: yes + ansible.builtin.debug: + var: nvidia_smi_output.stdout + diff --git a/roles/nvidia/tasks/main.yml b/roles/nvidia/tasks/main.yml index b55e8e0..268fe03 100644 --- a/roles/nvidia/tasks/main.yml +++ b/roles/nvidia/tasks/main.yml @@ -1,71 +1,3 @@ -- name: Instalar driver NVIDIA - become: yes - ansible.builtin.apt: - name: nvidia-driver-550 - state: present - -- name: Instalar utilitários NVIDIA - become: yes - ansible.builtin.apt: - name: nvidia-utils-550 - state: present - - -- name: Reboot host e esperar voltar - become: yes - ansible.builtin.reboot: - reboot_timeout: 600 # tempo máximo para o host voltar (segundos) - test_command: whoami # comando usado para validar que o host voltou - -- name: Adicionar chave GPG da NVIDIA - become: yes - become_user: fenix - ansible.builtin.shell: | - curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg - args: - creates: /usr/share/keyrings/nvidia-container-toolkit.gpg - -- name: Adicionar repositório NVIDIA - become: yes - ansible.builtin.shell: | - curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ - && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ - sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ - sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list - args: - creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list - -- name: Atualizar cache de pacotes - become: yes - ansible.builtin.apt: - update_cache: yes - -- name: Instalar NVIDIA Container Toolkit - become: yes - ansible.builtin.apt: - name: nvidia-container-toolkit - state: present - - -- name: Configurar runtime containerd - become: yes - ansible.builtin.shell: | - nvidia-ctk runtime configure --runtime=containerd - -- name: Reiniciar containerd - become: yes - ansible.builtin.systemd: - name: containerd - state: restarted - - -- name: Mostrar resultado nvidia-smi - become: yes - ansible.builtin.debug: - var: nvidia_smi_output.stdout - - - - name: Aplicar o stolon become: yes become_user: fenix diff --git a/roles/nvidia/vars/main.yml b/roles/nvidia/vars/main.yml deleted file mode 100644 index c8fe60b..0000000 --- a/roles/nvidia/vars/main.yml +++ /dev/null @@ -1,4 +0,0 @@ -bw_password: "{{ lookup('env', 'BW_PASSWORD') }}" -VAULTWARDEN_LINK: "{{ lookup('env', 'VAULTWARDEN_LINK') }}" -BW_CLIENTID: "{{ lookup('env', 'BW_CLIENTID') }}" -BW_CLIENTSECRET : "{{ lookup('env', 'BW_CLIENTSECRET') }}" \ No newline at end of file