refactor(ml): model downloading (#3545)

* download facial recognition models * download hf models * simplified logic * updated `predict` for facial recognition * ensure download method is called * fixed repo_id for clip * fixed download destination * use st's own `snapshot_download` * conditional download * fixed predict method * check if loaded * minor fixes * updated mypy overrides * added pytest-mock * updated tests * updated lock
2026-02-04 23:45:53 +00:00 · 2023-08-05 22:45:13 -04:00
parent 2f26a7edae
commit c73832bd9c
10 changed files with 350 additions and 274 deletions
--- a/machine-learning/app/models/clip.py
+++ b/machine-learning/app/models/clip.py
@@ -1,8 +1,8 @@
-from pathlib import Path
 from typing import Any

 from PIL.Image import Image
 from sentence_transformers import SentenceTransformer
+from sentence_transformers.util import snapshot_download

 from ..schemas import ModelType
 from .base import InferenceModel
@@ -11,12 +11,21 @@ from .base import InferenceModel
 class CLIPSTEncoder(InferenceModel):
    _model_type = ModelType.CLIP

-    def load(self, **model_kwargs: Any) -> None:
+    def _download(self, **model_kwargs: Any) -> None:
+        repo_id = self.model_name if "/" in self.model_name else f"sentence-transformers/{self.model_name}"
+        snapshot_download(
+            cache_dir=self.cache_dir,
+            repo_id=repo_id,
+            library_name="sentence-transformers",
+            ignore_files=["flax_model.msgpack", "rust_model.ot", "tf_model.h5"],
+        )
+
+    def _load(self, **model_kwargs: Any) -> None:
        self.model = SentenceTransformer(
            self.model_name,
            cache_folder=self.cache_dir.as_posix(),
            **model_kwargs,
        )

-    def predict(self, image_or_text: Image | str) -> list[float]:
+    def _predict(self, image_or_text: Image | str) -> list[float]:
        return self.model.encode(image_or_text).tolist()