mirror of
https://gitea.fenix-dev.com/fenix-gitea-admin/iac-ansible-private.git
synced 2026-03-22 04:19:48 +00:00
Compare commits
11 Commits
40b8d89ee7
...
cb1bf070b0
| Author | SHA1 | Date | |
|---|---|---|---|
| cb1bf070b0 | |||
| 8dec820f1d | |||
| 31f624c429 | |||
| 955d1ac2f2 | |||
| 4377f5917c | |||
| 4eb7726ed8 | |||
| 369b8dae80 | |||
| 1c24b3e030 | |||
| 607a6b82c3 | |||
| 3c4fab1534 | |||
| 4b1fa18f7f |
@ -6,7 +6,7 @@
|
|||||||
- vaultwarden
|
- vaultwarden
|
||||||
- kubernetes
|
- kubernetes
|
||||||
- kube-master
|
- kube-master
|
||||||
- nvidia
|
- nvidia-base
|
||||||
|
|
||||||
- name: Configure Kubernetes Nodes
|
- name: Configure Kubernetes Nodes
|
||||||
hosts: workers
|
hosts: workers
|
||||||
@ -16,12 +16,13 @@
|
|||||||
- vaultwarden
|
- vaultwarden
|
||||||
- kubernetes
|
- kubernetes
|
||||||
- kube-node
|
- kube-node
|
||||||
- nvidia
|
- nvidia-base
|
||||||
|
|
||||||
- name: Configure Kubernetes Data
|
- name: Configure Kubernetes Data
|
||||||
hosts: master
|
hosts: master
|
||||||
become: yes
|
become: yes
|
||||||
roles:
|
roles:
|
||||||
|
- nvidia
|
||||||
- stolon
|
- stolon
|
||||||
- cloudflared
|
- cloudflared
|
||||||
- qbittorrent
|
- qbittorrent
|
||||||
|
|||||||
66
roles/nvidia-base/tasks/main.yml
Normal file
66
roles/nvidia-base/tasks/main.yml
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
- name: Instalar driver NVIDIA
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: nvidia-driver-550
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: Instalar utilitários NVIDIA
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: nvidia-utils-550
|
||||||
|
state: present
|
||||||
|
|
||||||
|
|
||||||
|
- name: Reboot host e esperar voltar
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.reboot:
|
||||||
|
reboot_timeout: 600 # tempo máximo para o host voltar (segundos)
|
||||||
|
test_command: whoami # comando usado para validar que o host voltou
|
||||||
|
|
||||||
|
- name: Adicionar chave GPG da NVIDIA
|
||||||
|
become: yes
|
||||||
|
become_user: fenix
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg
|
||||||
|
args:
|
||||||
|
creates: /usr/share/keyrings/nvidia-container-toolkit.gpg
|
||||||
|
|
||||||
|
- name: Adicionar repositório NVIDIA
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
|
||||||
|
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
||||||
|
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
|
||||||
|
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||||
|
args:
|
||||||
|
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||||
|
|
||||||
|
- name: Atualizar cache de pacotes
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.apt:
|
||||||
|
update_cache: yes
|
||||||
|
|
||||||
|
- name: Instalar NVIDIA Container Toolkit
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: nvidia-container-toolkit
|
||||||
|
state: present
|
||||||
|
|
||||||
|
|
||||||
|
- name: Configurar runtime containerd
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
nvidia-ctk runtime configure --runtime=containerd
|
||||||
|
|
||||||
|
- name: Reiniciar containerd
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.systemd:
|
||||||
|
name: containerd
|
||||||
|
state: restarted
|
||||||
|
|
||||||
|
|
||||||
|
- name: Mostrar resultado nvidia-smi
|
||||||
|
become: yes
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: nvidia_smi_output.stdout
|
||||||
|
|
||||||
@ -1,94 +1,7 @@
|
|||||||
- name: Instalar as nvidia drivers
|
- name: Aplicar o stolon
|
||||||
become: yes
|
become: yes
|
||||||
become_user: fenix
|
become_user: fenix
|
||||||
shell: |
|
shell: |
|
||||||
sudo apt install nvidia-driver-550
|
kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml
|
||||||
sudo apt install nvidia-utils-550
|
|
||||||
environment:
|
environment:
|
||||||
KUBECONFIG: /home/fenix/.kube/config
|
KUBECONFIG: /home/fenix/.kube/config
|
||||||
|
|
||||||
- name: Reboot remoto e continuar
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
tasks:
|
|
||||||
- name: Reboot host e esperar voltar
|
|
||||||
ansible.builtin.reboot:
|
|
||||||
reboot_timeout: 600 # tempo máximo para o host voltar (segundos)
|
|
||||||
test_command: whoami # comando usado para validar que o host voltou
|
|
||||||
|
|
||||||
|
|
||||||
- name: Adicionar chave GPG da NVIDIA
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.shell: |
|
|
||||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg
|
|
||||||
args:
|
|
||||||
creates: /usr/share/keyrings/nvidia-container-toolkit.gpg
|
|
||||||
|
|
||||||
|
|
||||||
- name: Adicionar repositório NVIDIA
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.shell: |
|
|
||||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit.gpg] https://#' | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
|
||||||
args:
|
|
||||||
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
|
||||||
|
|
||||||
|
|
||||||
- name: Atualizar cache de pacotes
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.apt:
|
|
||||||
update_cache: yes
|
|
||||||
|
|
||||||
- name: Instalar NVIDIA Container Toolkit
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.apt:
|
|
||||||
name: nvidia-container-toolkit
|
|
||||||
state: present
|
|
||||||
|
|
||||||
|
|
||||||
- name: Configurar runtime Docker
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.shell: |
|
|
||||||
nvidia-ctk runtime configure --runtime=docker
|
|
||||||
when: "'docker.io' in ansible_facts.packages"
|
|
||||||
|
|
||||||
- name: Reiniciar Docker
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.systemd:
|
|
||||||
name: docker
|
|
||||||
state: restarted
|
|
||||||
when: "'docker.io' in ansible_facts.packages"
|
|
||||||
|
|
||||||
- name: Configurar runtime containerd
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.shell: |
|
|
||||||
nvidia-ctk runtime configure --runtime=containerd
|
|
||||||
when: "'containerd' in ansible_facts.packages"
|
|
||||||
|
|
||||||
- name: Reiniciar containerd
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.systemd:
|
|
||||||
name: containerd
|
|
||||||
state: restarted
|
|
||||||
when: "'containerd' in ansible_facts.packages"
|
|
||||||
|
|
||||||
- name: Validar GPU dentro de container
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.shell: |
|
|
||||||
docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.2-base nvidia-smi
|
|
||||||
register: nvidia_smi_output
|
|
||||||
ignore_errors: true
|
|
||||||
|
|
||||||
- name: Mostrar resultado nvidia-smi
|
|
||||||
become: yes
|
|
||||||
become_user: fenix
|
|
||||||
ansible.builtin.debug:
|
|
||||||
var: nvidia_smi_output.stdout
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
bw_password: "{{ lookup('env', 'BW_PASSWORD') }}"
|
|
||||||
VAULTWARDEN_LINK: "{{ lookup('env', 'VAULTWARDEN_LINK') }}"
|
|
||||||
BW_CLIENTID: "{{ lookup('env', 'BW_CLIENTID') }}"
|
|
||||||
BW_CLIENTSECRET : "{{ lookup('env', 'BW_CLIENTSECRET') }}"
|
|
||||||
Reference in New Issue
Block a user