nvidia in all nodes

This commit is contained in:
Tomás Limpinho
2025-12-08 16:25:54 +00:00
parent bdda2191c4
commit 40b8d89ee7
3 changed files with 101 additions and 1 deletions

View File

@ -6,6 +6,7 @@
- vaultwarden - vaultwarden
- kubernetes - kubernetes
- kube-master - kube-master
- nvidia
- name: Configure Kubernetes Nodes - name: Configure Kubernetes Nodes
hosts: workers hosts: workers
@ -15,7 +16,8 @@
- vaultwarden - vaultwarden
- kubernetes - kubernetes
- kube-node - kube-node
- nvidia
- name: Configure Kubernetes Data - name: Configure Kubernetes Data
hosts: master hosts: master
become: yes become: yes

View File

@ -0,0 +1,94 @@
- name: Instalar as nvidia drivers
become: yes
become_user: fenix
shell: |
sudo apt install nvidia-driver-550
sudo apt install nvidia-utils-550
environment:
KUBECONFIG: /home/fenix/.kube/config
- name: Reboot remoto e continuar
become: yes
become_user: fenix
tasks:
- name: Reboot host e esperar voltar
ansible.builtin.reboot:
reboot_timeout: 600 # tempo máximo para o host voltar (segundos)
test_command: whoami # comando usado para validar que o host voltou
- name: Adicionar chave GPG da NVIDIA
become: yes
become_user: fenix
ansible.builtin.shell: |
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg
args:
creates: /usr/share/keyrings/nvidia-container-toolkit.gpg
- name: Adicionar repositório NVIDIA
become: yes
become_user: fenix
ansible.builtin.shell: |
curl -s -L https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit.gpg] https://#' | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
args:
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
- name: Atualizar cache de pacotes
become: yes
become_user: fenix
ansible.builtin.apt:
update_cache: yes
- name: Instalar NVIDIA Container Toolkit
become: yes
become_user: fenix
ansible.builtin.apt:
name: nvidia-container-toolkit
state: present
- name: Configurar runtime Docker
become: yes
become_user: fenix
ansible.builtin.shell: |
nvidia-ctk runtime configure --runtime=docker
when: "'docker.io' in ansible_facts.packages"
- name: Reiniciar Docker
become: yes
become_user: fenix
ansible.builtin.systemd:
name: docker
state: restarted
when: "'docker.io' in ansible_facts.packages"
- name: Configurar runtime containerd
become: yes
become_user: fenix
ansible.builtin.shell: |
nvidia-ctk runtime configure --runtime=containerd
when: "'containerd' in ansible_facts.packages"
- name: Reiniciar containerd
become: yes
become_user: fenix
ansible.builtin.systemd:
name: containerd
state: restarted
when: "'containerd' in ansible_facts.packages"
- name: Validar GPU dentro de container
become: yes
become_user: fenix
ansible.builtin.shell: |
docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.2-base nvidia-smi
register: nvidia_smi_output
ignore_errors: true
- name: Mostrar resultado nvidia-smi
become: yes
become_user: fenix
ansible.builtin.debug:
var: nvidia_smi_output.stdout

View File

@ -0,0 +1,4 @@
bw_password: "{{ lookup('env', 'BW_PASSWORD') }}"
VAULTWARDEN_LINK: "{{ lookup('env', 'VAULTWARDEN_LINK') }}"
BW_CLIENTID: "{{ lookup('env', 'BW_CLIENTID') }}"
BW_CLIENTSECRET : "{{ lookup('env', 'BW_CLIENTSECRET') }}"