apiVersion: metallb.io/v1beta1 kind: IPAddressPool metadata: name: local-pool-2 namespace: metallb-system spec: addresses: - 192.168.1.100-192.168.1.200 --- apiVersion: apps/v1 kind: Deployment metadata: name: ai-openwebui-deployment namespace: fenix-ai spec: replicas: 1 selector: matchLabels: app: ai-openwebui template: metadata: labels: app: ai-openwebui spec: containers: - name: ai-openwebui image: ghcr.io/open-webui/open-webui:cuda ports: - containerPort: 8080 env: - name: WEBUI_HOST value: "0.0.0.0" - name: OLLAMA_BASE_URL value: "http://ollama-api-svc.fenix-ai.svc.cluster.local:11434" volumeMounts: - name: ai-openwebui-data mountPath: /app/backend/data volumes: - name: ai-openwebui-data persistentVolumeClaim: claimName: ai-openwebui-data-pvc --- apiVersion: v1 kind: PersistentVolume metadata: name: ai-openwebui-data-pv namespace: fenix-ai spec: capacity: storage: 40Gi storageClassName: ai-openwebui-nfs-csi accessModes: - ReadWriteOnce persistentVolumeReclaimPolicy: Retain nfs: server: 192.168.1.22 path: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ai-openwebui --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: ai-openwebui-data-pvc namespace: fenix-ai spec: storageClassName: ai-openwebui-nfs-csi accessModes: - ReadWriteOnce resources: requests: storage: 40Gi --- apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: ai-openwebui-nfs-csi namespace: fenix-ai provisioner: nfs.csi.k8s.io parameters: server: 192.168.1.22 share: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ai-openwebui allowVolumeExpansion: true reclaimPolicy: Retain --- apiVersion: v1 kind: Service metadata: name: ai-openwebui-svc namespace: fenix-ai spec: selector: app: ai-openwebui ports: - port: 8080 targetPort: 8080 type: ClusterIP # ou LoadBalancer se tiveres suporte --- apiVersion: apps/v1 kind: Deployment metadata: name: ollama-deployment namespace: fenix-ai spec: replicas: 1 selector: matchLabels: app: ollama template: metadata: labels: app: ollama spec: containers: - name: ollama image: ollama/ollama:latest ports: - containerPort: 11434 resources: limits: nvidia.com/gpu: 1 # garante uso da tua RTX 4060 Ti memory: 17Gi env: - name: OLLAMA_HOST value: "0.0.0.0" - name: NVIDIA_VISIBLE_DEVICES value: "all" - name: NVIDIA_DRIVER_CAPABILITIES value: "compute,utility" - name: CUDA_VISIBLE_DEVICES value: "0" volumeMounts: - name: ollama-data mountPath: /root/.ollama volumes: - name: ollama-data persistentVolumeClaim: claimName: ollama-data-pvc --- apiVersion: v1 kind: Service metadata: name: ollama-api-svc namespace: fenix-ai spec: selector: app: ollama ports: - protocol: TCP port: 11434 targetPort: 11434 type: NodePort # ou LoadBalancer se tiveres suporte --- apiVersion: v1 kind: PersistentVolume metadata: name: ollama-data-pv namespace: fenix-ai spec: capacity: storage: 40Gi storageClassName: ollama-ai-nfs-csi accessModes: - ReadWriteOnce persistentVolumeReclaimPolicy: Retain nfs: server: 192.168.1.22 path: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ollama --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: ollama-data-pvc namespace: fenix-ai spec: storageClassName: ollama-ai-nfs-csi accessModes: - ReadWriteOnce resources: requests: storage: 40Gi --- apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: ollama-ai-nfs-csi namespace: fenix-ai provisioner: nfs.csi.k8s.io parameters: server: 192.168.1.22 share: /mnt/fenix-main-nas-pool-0/data/k8s-Volumes/k8s-cluster-iac-deployed/ollama allowVolumeExpansion: true reclaimPolicy: Retain