Files
iac-ansible-public/roles/cripto/files/ai-ollama-deployment.yaml
2026-04-27 17:37:03 +01:00

192 lines
4.1 KiB
YAML

apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: local-pool-2
namespace: metallb-system
spec:
addresses:
- 192.168.1.100-192.168.1.200
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ai-openwebui-deployment
namespace: fenix-ai
spec:
replicas: 1
selector:
matchLabels:
app: ai-openwebui
template:
metadata:
labels:
app: ai-openwebui
spec:
containers:
- name: ai-openwebui
image: ghcr.io/open-webui/open-webui:cuda
ports:
- containerPort: 8080
env:
- name: WEBUI_HOST
value: "0.0.0.0"
- name: OLLAMA_BASE_URL
value: "http://ollama-api-svc.fenix-ai.svc.cluster.local:11434"
volumeMounts:
- name: ai-openwebui-data
mountPath: /app/backend/data
volumes:
- name: ai-openwebui-data
persistentVolumeClaim:
claimName: ai-openwebui-data-pvc
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: ai-openwebui-data-pv
namespace: fenix-ai
spec:
capacity:
storage: 40Gi
storageClassName: ai-openwebui-nfs-csi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
nfs:
server: 192.168.1.22
path: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ai-openwebui
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ai-openwebui-data-pvc
namespace: fenix-ai
spec:
storageClassName: ai-openwebui-nfs-csi
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 40Gi
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ai-openwebui-nfs-csi
namespace: fenix-ai
provisioner: nfs.csi.k8s.io
parameters:
server: 192.168.1.22
share: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ai-openwebui
allowVolumeExpansion: true
reclaimPolicy: Retain
---
apiVersion: v1
kind: Service
metadata:
name: ai-openwebui-svc
namespace: fenix-ai
spec:
selector:
app: ai-openwebui
ports:
- port: 8080
targetPort: 8080
type: ClusterIP # ou LoadBalancer se tiveres suporte
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama-deployment
namespace: fenix-ai
spec:
replicas: 1
selector:
matchLabels:
app: ollama
template:
metadata:
labels:
app: ollama
spec:
containers:
- name: ollama
image: ollama/ollama:latest
ports:
- containerPort: 11434
resources:
limits:
nvidia.com/gpu: 1 # garante uso da tua RTX 4060 Ti
memory: 17Gi
env:
- name: OLLAMA_HOST
value: "0.0.0.0"
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,utility"
- name: CUDA_VISIBLE_DEVICES
value: "0"
volumeMounts:
- name: ollama-data
mountPath: /root/.ollama
volumes:
- name: ollama-data
persistentVolumeClaim:
claimName: ollama-data-pvc
---
apiVersion: v1
kind: Service
metadata:
name: ollama-api-svc
namespace: fenix-ai
spec:
selector:
app: ollama
ports:
- protocol: TCP
port: 11434
targetPort: 11434
type: NodePort # ou LoadBalancer se tiveres suporte
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: ollama-data-pv
namespace: fenix-ai
spec:
capacity:
storage: 40Gi
storageClassName: ollama-ai-nfs-csi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
nfs:
server: 192.168.1.22
path: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ollama
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-data-pvc
namespace: fenix-ai
spec:
storageClassName: ollama-ai-nfs-csi
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 40Gi
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ollama-ai-nfs-csi
namespace: fenix-ai
provisioner: nfs.csi.k8s.io
parameters:
server: 192.168.1.22
share: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ollama
allowVolumeExpansion: true
reclaimPolicy: Retain