Compare commits

...

11 Commits

Author SHA1 Message Date
cb1bf070b0 nvidia-base vsnvidia frfom kubernetes 2025-12-08 19:31:29 +00:00
8dec820f1d nvidia for kubernetes 2025-12-08 19:01:39 +00:00
31f624c429 containerd 2025-12-08 18:47:24 +00:00
955d1ac2f2 apt install 2025-12-08 18:25:47 +00:00
4377f5917c list packages corrected 2025-12-08 17:50:47 +00:00
4eb7726ed8 creaters 2025-12-08 17:42:58 +00:00
369b8dae80 correctioin adding repositorio nvidia 2025-12-08 17:27:56 +00:00
1c24b3e030 fenix no more 2025-12-08 17:08:37 +00:00
607a6b82c3 Adicionar repositório NVIDIA 2025-12-08 16:58:05 +00:00
3c4fab1534 correction task nvidia 2025-12-08 16:35:49 +00:00
4b1fa18f7f a 2025-12-08 16:31:39 +00:00
4 changed files with 72 additions and 96 deletions

View File

@ -6,7 +6,7 @@
- vaultwarden
- kubernetes
- kube-master
- nvidia
- nvidia-base
- name: Configure Kubernetes Nodes
hosts: workers
@ -16,12 +16,13 @@
- vaultwarden
- kubernetes
- kube-node
- nvidia
- nvidia-base
- name: Configure Kubernetes Data
hosts: master
become: yes
roles:
- nvidia
- stolon
- cloudflared
- qbittorrent

View File

@ -0,0 +1,66 @@
- name: Instalar driver NVIDIA
become: yes
ansible.builtin.apt:
name: nvidia-driver-550
state: present
- name: Instalar utilitários NVIDIA
become: yes
ansible.builtin.apt:
name: nvidia-utils-550
state: present
- name: Reboot host e esperar voltar
become: yes
ansible.builtin.reboot:
reboot_timeout: 600 # tempo máximo para o host voltar (segundos)
test_command: whoami # comando usado para validar que o host voltou
- name: Adicionar chave GPG da NVIDIA
become: yes
become_user: fenix
ansible.builtin.shell: |
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg
args:
creates: /usr/share/keyrings/nvidia-container-toolkit.gpg
- name: Adicionar repositório NVIDIA
become: yes
ansible.builtin.shell: |
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
args:
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
- name: Atualizar cache de pacotes
become: yes
ansible.builtin.apt:
update_cache: yes
- name: Instalar NVIDIA Container Toolkit
become: yes
ansible.builtin.apt:
name: nvidia-container-toolkit
state: present
- name: Configurar runtime containerd
become: yes
ansible.builtin.shell: |
nvidia-ctk runtime configure --runtime=containerd
- name: Reiniciar containerd
become: yes
ansible.builtin.systemd:
name: containerd
state: restarted
- name: Mostrar resultado nvidia-smi
become: yes
ansible.builtin.debug:
var: nvidia_smi_output.stdout

View File

@ -1,94 +1,7 @@
- name: Instalar as nvidia drivers
- name: Aplicar o stolon
become: yes
become_user: fenix
shell: |
sudo apt install nvidia-driver-550
sudo apt install nvidia-utils-550
kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml
environment:
KUBECONFIG: /home/fenix/.kube/config
- name: Reboot remoto e continuar
become: yes
become_user: fenix
tasks:
- name: Reboot host e esperar voltar
ansible.builtin.reboot:
reboot_timeout: 600 # tempo máximo para o host voltar (segundos)
test_command: whoami # comando usado para validar que o host voltou
- name: Adicionar chave GPG da NVIDIA
become: yes
become_user: fenix
ansible.builtin.shell: |
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg
args:
creates: /usr/share/keyrings/nvidia-container-toolkit.gpg
- name: Adicionar repositório NVIDIA
become: yes
become_user: fenix
ansible.builtin.shell: |
curl -s -L https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit.gpg] https://#' | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
args:
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
- name: Atualizar cache de pacotes
become: yes
become_user: fenix
ansible.builtin.apt:
update_cache: yes
- name: Instalar NVIDIA Container Toolkit
become: yes
become_user: fenix
ansible.builtin.apt:
name: nvidia-container-toolkit
state: present
- name: Configurar runtime Docker
become: yes
become_user: fenix
ansible.builtin.shell: |
nvidia-ctk runtime configure --runtime=docker
when: "'docker.io' in ansible_facts.packages"
- name: Reiniciar Docker
become: yes
become_user: fenix
ansible.builtin.systemd:
name: docker
state: restarted
when: "'docker.io' in ansible_facts.packages"
- name: Configurar runtime containerd
become: yes
become_user: fenix
ansible.builtin.shell: |
nvidia-ctk runtime configure --runtime=containerd
when: "'containerd' in ansible_facts.packages"
- name: Reiniciar containerd
become: yes
become_user: fenix
ansible.builtin.systemd:
name: containerd
state: restarted
when: "'containerd' in ansible_facts.packages"
- name: Validar GPU dentro de container
become: yes
become_user: fenix
ansible.builtin.shell: |
docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.2-base nvidia-smi
register: nvidia_smi_output
ignore_errors: true
- name: Mostrar resultado nvidia-smi
become: yes
become_user: fenix
ansible.builtin.debug:
var: nvidia_smi_output.stdout

View File

@ -1,4 +0,0 @@
bw_password: "{{ lookup('env', 'BW_PASSWORD') }}"
VAULTWARDEN_LINK: "{{ lookup('env', 'VAULTWARDEN_LINK') }}"
BW_CLIENTID: "{{ lookup('env', 'BW_CLIENTID') }}"
BW_CLIENTSECRET : "{{ lookup('env', 'BW_CLIENTSECRET') }}"