fix(ml): load models in separate threads (#4034)

* load models in thread * set clip mode logs to debug level * updated tests * made fixtures slightly less ugly * moved responses to json file * formatting
2025-10-29 17:40:28 +00:00 · 2023-09-09 05:02:44 -04:00
parent f1db257628
commit 258b98c262
9 changed files with 1683 additions and 114 deletions
--- a/machine-learning/app/models/image_classification.py
+++ b/machine-learning/app/models/image_classification.py
@@ -26,7 +26,7 @@ class ImageClassifier(InferenceModel):
        self.min_score = model_kwargs.pop("minScore", min_score)
        super().__init__(model_name, cache_dir, **model_kwargs)

-    def _download(self, **model_kwargs: Any) -> None:
+    def _download(self) -> None:
        snapshot_download(
            cache_dir=self.cache_dir,
            repo_id=self.model_name,
@@ -35,10 +35,10 @@ class ImageClassifier(InferenceModel):
            local_dir_use_symlinks=True,
        )

-    def _load(self, **model_kwargs: Any) -> None:
+    def _load(self) -> None:
        processor = AutoImageProcessor.from_pretrained(self.cache_dir, cache_dir=self.cache_dir)
        model_path = self.cache_dir / "model.onnx"
-        model_kwargs |= {
+        model_kwargs = {
            "cache_dir": self.cache_dir,
            "provider": self.providers[0],
            "provider_options": self.provider_options[0],