feat(ml)!: customizable ML settings (#3891)

* consolidated endpoints, added live configuration * added ml settings to server * added settings dashboard * updated deps, fixed typos * simplified modelconfig updated tests * Added ml setting accordion for admin page updated tests * merge `clipText` and `clipVision` * added face distance setting clarified setting * add clip mode in request, dropdown for face models * polished ml settings updated descriptions * update clip field on error * removed unused import * add description for image classification threshold * pin safetensors for arm wheel updated poetry lock * moved dto * set model type only in ml repository * revert form-data package install use fetch instead of axios * added slotted description with link updated facial recognition description clarified effect of disabling tasks * validation before model load * removed unnecessary getconfig call * added migration * updated api updated api updated api --------- Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
2025-12-08 20:29:05 +00:00 · 2023-08-29 09:58:00 -04:00
parent 22f5e05060
commit bcc36d14a1
56 changed files with 2324 additions and 655 deletions
--- a/machine-learning/app/models/clip.py
+++ b/machine-learning/app/models/clip.py
@@ -1,5 +1,6 @@
 import os
 import zipfile
+from io import BytesIO
 from typing import Any, Literal

 import onnxruntime as ort
@@ -8,7 +9,7 @@ from clip_server.model.clip import BICUBIC, _convert_image_to_rgb
 from clip_server.model.clip_onnx import _MODELS, _S3_BUCKET_V2, CLIPOnnxModel, download_model
 from clip_server.model.pretrained_models import _VISUAL_MODEL_IMAGE_SIZE
 from clip_server.model.tokenization import Tokenizer
-from PIL.Image import Image
+from PIL import Image
 from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor

 from ..schemas import ModelType
@@ -74,9 +75,12 @@ class CLIPEncoder(InferenceModel):
            image_size = _VISUAL_MODEL_IMAGE_SIZE[CLIPOnnxModel.get_model_name(self.model_name)]
            self.transform = _transform_pil_image(image_size)

-    def _predict(self, image_or_text: Image | str) -> list[float]:
+    def _predict(self, image_or_text: Image.Image | str) -> list[float]:
+        if isinstance(image_or_text, bytes):
+            image_or_text = Image.open(BytesIO(image_or_text))
+
        match image_or_text:
-            case Image():
+            case Image.Image():
                if self.mode == "text":
                    raise TypeError("Cannot encode image as text-only model")
                pixel_values = self.transform(image_or_text)