mirror of
https://github.com/KevinMidboe/immich.git
synced 2025-10-29 17:40:28 +00:00
feat(server): CLIP search integration (#1939)
This commit is contained in:
@@ -15,4 +15,14 @@ export class SmartInfoEntity {
|
||||
|
||||
@Column({ type: 'text', array: true, nullable: true })
|
||||
objects!: string[] | null;
|
||||
|
||||
@Column({
|
||||
type: 'numeric',
|
||||
array: true,
|
||||
nullable: true,
|
||||
// note: migration generator is broken for numeric[], but these _are_ set in the database
|
||||
// precision: 20,
|
||||
// scale: 19,
|
||||
})
|
||||
clipEmbedding!: number[] | null;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
import { MigrationInterface, QueryRunner } from 'typeorm';
|
||||
|
||||
export class AddCLIPEncodeDataColumn1677971458822 implements MigrationInterface {
|
||||
name = 'AddCLIPEncodeDataColumn1677971458822';
|
||||
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "smart_info" ADD "clipEmbedding" numeric(20,19) array`);
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "smart_info" DROP COLUMN "clipEmbedding"`);
|
||||
}
|
||||
}
|
||||
@@ -1,19 +1,34 @@
|
||||
import { IAlbumRepository } from '@app/domain';
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { In, Repository } from 'typeorm';
|
||||
import { AlbumEntity } from '../entities';
|
||||
|
||||
@Injectable()
|
||||
export class AlbumRepository implements IAlbumRepository {
|
||||
constructor(@InjectRepository(AlbumEntity) private repository: Repository<AlbumEntity>) {}
|
||||
|
||||
getByIds(ids: string[]): Promise<AlbumEntity[]> {
|
||||
return this.repository.find({
|
||||
where: {
|
||||
id: In(ids),
|
||||
},
|
||||
relations: {
|
||||
owner: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async deleteAll(userId: string): Promise<void> {
|
||||
await this.repository.delete({ ownerId: userId });
|
||||
}
|
||||
|
||||
getAll(): Promise<AlbumEntity[]> {
|
||||
return this.repository.find();
|
||||
return this.repository.find({
|
||||
relations: {
|
||||
owner: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async save(album: Partial<AlbumEntity>) {
|
||||
|
||||
@@ -1,13 +1,24 @@
|
||||
import { AssetSearchOptions, IAssetRepository } from '@app/domain';
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Not, Repository } from 'typeorm';
|
||||
import { In, Not, Repository } from 'typeorm';
|
||||
import { AssetEntity, AssetType } from '../entities';
|
||||
|
||||
@Injectable()
|
||||
export class AssetRepository implements IAssetRepository {
|
||||
constructor(@InjectRepository(AssetEntity) private repository: Repository<AssetEntity>) {}
|
||||
|
||||
getByIds(ids: string[]): Promise<AssetEntity[]> {
|
||||
return this.repository.find({
|
||||
where: { id: In(ids) },
|
||||
relations: {
|
||||
exifInfo: true,
|
||||
smartInfo: true,
|
||||
tags: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async deleteAll(ownerId: string): Promise<void> {
|
||||
await this.repository.delete({ ownerId });
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ export class JobRepository implements IJobRepository {
|
||||
|
||||
case JobName.OBJECT_DETECTION:
|
||||
case JobName.IMAGE_TAGGING:
|
||||
case JobName.ENCODE_CLIP:
|
||||
await this.machineLearning.add(item.name, item.data);
|
||||
break;
|
||||
|
||||
@@ -73,7 +74,7 @@ export class JobRepository implements IJobRepository {
|
||||
|
||||
case JobName.SEARCH_INDEX_ASSETS:
|
||||
case JobName.SEARCH_INDEX_ALBUMS:
|
||||
await this.searchIndex.add(item.name);
|
||||
await this.searchIndex.add(item.name, {});
|
||||
break;
|
||||
|
||||
case JobName.SEARCH_INDEX_ASSET:
|
||||
|
||||
@@ -14,4 +14,12 @@ export class MachineLearningRepository implements IMachineLearningRepository {
|
||||
detectObjects(input: MachineLearningInput): Promise<string[]> {
|
||||
return client.post<string[]>('/object-detection/detect-object', input).then((res) => res.data);
|
||||
}
|
||||
|
||||
encodeImage(input: MachineLearningInput): Promise<number[]> {
|
||||
return client.post<number[]>('/sentence-transformer/encode-image', input).then((res) => res.data);
|
||||
}
|
||||
|
||||
encodeText(input: string): Promise<number[]> {
|
||||
return client.post<number[]>('/sentence-transformer/encode-text', { text: input }).then((res) => res.data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { CollectionCreateSchema } from 'typesense/lib/Typesense/Collections';
|
||||
|
||||
export const assetSchemaVersion = 2;
|
||||
export const assetSchemaVersion = 3;
|
||||
export const assetSchema: CollectionCreateSchema = {
|
||||
name: `assets-v${assetSchemaVersion}`,
|
||||
fields: [
|
||||
@@ -29,6 +29,7 @@ export const assetSchema: CollectionCreateSchema = {
|
||||
// smart info
|
||||
{ name: 'smartInfo.objects', type: 'string[]', facet: true, optional: true },
|
||||
{ name: 'smartInfo.tags', type: 'string[]', facet: true, optional: true },
|
||||
{ name: 'smartInfo.clipEmbedding', type: 'float[]', facet: false, optional: true, num_dim: 512 },
|
||||
|
||||
// computed
|
||||
{ name: 'geo', type: 'geopoint', facet: false, optional: true },
|
||||
|
||||
@@ -16,12 +16,7 @@ import { AlbumEntity, AssetEntity } from '../db';
|
||||
import { albumSchema } from './schemas/album.schema';
|
||||
import { assetSchema } from './schemas/asset.schema';
|
||||
|
||||
interface CustomAssetEntity extends AssetEntity {
|
||||
geo?: [number, number];
|
||||
motion?: boolean;
|
||||
}
|
||||
|
||||
function removeNil<T extends Dictionary<any>>(item: T): Partial<T> {
|
||||
function removeNil<T extends Dictionary<any>>(item: T): T {
|
||||
_.forOwn(item, (value, key) => {
|
||||
if (_.isNil(value) || (_.isObject(value) && !_.isDate(value) && _.isEmpty(removeNil(value)))) {
|
||||
delete item[key];
|
||||
@@ -31,6 +26,11 @@ function removeNil<T extends Dictionary<any>>(item: T): Partial<T> {
|
||||
return item;
|
||||
}
|
||||
|
||||
interface CustomAssetEntity extends AssetEntity {
|
||||
geo?: [number, number];
|
||||
motion?: boolean;
|
||||
}
|
||||
|
||||
const schemaMap: Record<SearchCollection, CollectionCreateSchema> = {
|
||||
[SearchCollection.ASSETS]: assetSchema,
|
||||
[SearchCollection.ALBUMS]: albumSchema,
|
||||
@@ -38,24 +38,9 @@ const schemaMap: Record<SearchCollection, CollectionCreateSchema> = {
|
||||
|
||||
const schemas = Object.entries(schemaMap) as [SearchCollection, CollectionCreateSchema][];
|
||||
|
||||
interface SearchUpdateQueue<T = any> {
|
||||
upsert: T[];
|
||||
delete: string[];
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class TypesenseRepository implements ISearchRepository {
|
||||
private logger = new Logger(TypesenseRepository.name);
|
||||
private queue: Record<SearchCollection, SearchUpdateQueue> = {
|
||||
[SearchCollection.ASSETS]: {
|
||||
upsert: [],
|
||||
delete: [],
|
||||
},
|
||||
[SearchCollection.ALBUMS]: {
|
||||
upsert: [],
|
||||
delete: [],
|
||||
},
|
||||
};
|
||||
|
||||
private _client: Client | null = null;
|
||||
private get client(): Client {
|
||||
@@ -83,8 +68,6 @@ export class TypesenseRepository implements ISearchRepository {
|
||||
numRetries: 3,
|
||||
connectionTimeoutSeconds: 10,
|
||||
});
|
||||
|
||||
setInterval(() => this.flush(), 5_000);
|
||||
}
|
||||
|
||||
async setup(): Promise<void> {
|
||||
@@ -131,48 +114,27 @@ export class TypesenseRepository implements ISearchRepository {
|
||||
return migrationMap;
|
||||
}
|
||||
|
||||
async index(collection: SearchCollection, item: AssetEntity | AlbumEntity, immediate?: boolean): Promise<void> {
|
||||
const schema = schemaMap[collection];
|
||||
|
||||
if (collection === SearchCollection.ASSETS) {
|
||||
item = this.patchAsset(item as AssetEntity);
|
||||
}
|
||||
|
||||
if (immediate) {
|
||||
await this.client.collections(schema.name).documents().upsert(item);
|
||||
return;
|
||||
}
|
||||
|
||||
this.queue[collection].upsert.push(item);
|
||||
async importAlbums(items: AlbumEntity[], done: boolean): Promise<void> {
|
||||
await this.import(SearchCollection.ALBUMS, items, done);
|
||||
}
|
||||
|
||||
async delete(collection: SearchCollection, id: string, immediate?: boolean): Promise<void> {
|
||||
const schema = schemaMap[collection];
|
||||
|
||||
if (immediate) {
|
||||
await this.client.collections(schema.name).documents().delete(id);
|
||||
return;
|
||||
}
|
||||
|
||||
this.queue[collection].delete.push(id);
|
||||
async importAssets(items: AssetEntity[], done: boolean): Promise<void> {
|
||||
await this.import(SearchCollection.ASSETS, items, done);
|
||||
}
|
||||
|
||||
async import(collection: SearchCollection, items: AssetEntity[] | AlbumEntity[], done: boolean): Promise<void> {
|
||||
private async import(
|
||||
collection: SearchCollection,
|
||||
items: AlbumEntity[] | AssetEntity[],
|
||||
done: boolean,
|
||||
): Promise<void> {
|
||||
try {
|
||||
const schema = schemaMap[collection];
|
||||
const _items = items.map((item) => {
|
||||
if (collection === SearchCollection.ASSETS) {
|
||||
item = this.patchAsset(item as AssetEntity);
|
||||
}
|
||||
// null values are invalid for typesense documents
|
||||
return removeNil(item);
|
||||
});
|
||||
if (_items.length > 0) {
|
||||
await this.client
|
||||
.collections(schema.name)
|
||||
.documents()
|
||||
.import(_items, { action: 'upsert', dirty_values: 'coerce_or_drop' });
|
||||
if (items.length > 0) {
|
||||
await this.client.collections(schemaMap[collection].name).documents().import(this.patch(collection, items), {
|
||||
action: 'upsert',
|
||||
dirty_values: 'coerce_or_drop',
|
||||
});
|
||||
}
|
||||
|
||||
if (done) {
|
||||
await this.updateAlias(collection);
|
||||
}
|
||||
@@ -234,71 +196,81 @@ export class TypesenseRepository implements ISearchRepository {
|
||||
);
|
||||
}
|
||||
|
||||
search(collection: SearchCollection.ASSETS, query: string, filter: SearchFilter): Promise<SearchResult<AssetEntity>>;
|
||||
search(collection: SearchCollection.ALBUMS, query: string, filter: SearchFilter): Promise<SearchResult<AlbumEntity>>;
|
||||
async search(collection: SearchCollection, query: string, filters: SearchFilter) {
|
||||
const alias = await this.client.aliases(collection).retrieve();
|
||||
|
||||
const { userId } = filters;
|
||||
|
||||
const _filters = [`ownerId:${userId}`];
|
||||
|
||||
if (filters.id) {
|
||||
_filters.push(`id:=${filters.id}`);
|
||||
}
|
||||
if (collection === SearchCollection.ASSETS) {
|
||||
for (const item of schemaMap[collection].fields || []) {
|
||||
let value = filters[item.name as keyof SearchFilter];
|
||||
if (Array.isArray(value)) {
|
||||
value = `[${value.join(',')}]`;
|
||||
}
|
||||
if (item.facet && value !== undefined) {
|
||||
_filters.push(`${item.name}:${value}`);
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.debug(`Searching query='${query}', filters='${JSON.stringify(_filters)}'`);
|
||||
|
||||
const results = await this.client
|
||||
.collections<AssetEntity>(alias.collection_name)
|
||||
.documents()
|
||||
.search({
|
||||
q: query,
|
||||
query_by: [
|
||||
'exifInfo.imageName',
|
||||
'exifInfo.country',
|
||||
'exifInfo.state',
|
||||
'exifInfo.city',
|
||||
'exifInfo.description',
|
||||
'smartInfo.tags',
|
||||
'smartInfo.objects',
|
||||
].join(','),
|
||||
filter_by: _filters.join(' && '),
|
||||
per_page: 250,
|
||||
sort_by: filters.recent ? 'createdAt:desc' : undefined,
|
||||
facet_by: this.getFacetFieldNames(SearchCollection.ASSETS),
|
||||
});
|
||||
|
||||
return this.asResponse(results);
|
||||
}
|
||||
|
||||
if (collection === SearchCollection.ALBUMS) {
|
||||
const results = await this.client
|
||||
.collections<AlbumEntity>(alias.collection_name)
|
||||
.documents()
|
||||
.search({
|
||||
q: query,
|
||||
query_by: 'albumName',
|
||||
filter_by: _filters.join(','),
|
||||
});
|
||||
|
||||
return this.asResponse(results);
|
||||
}
|
||||
|
||||
throw new Error(`Invalid collection: ${collection}`);
|
||||
async deleteAlbums(ids: string[]): Promise<void> {
|
||||
await this.delete(SearchCollection.ALBUMS, ids);
|
||||
}
|
||||
|
||||
private asResponse<T extends DocumentSchema>(results: SearchResponse<T>): SearchResult<T> {
|
||||
async deleteAssets(ids: string[]): Promise<void> {
|
||||
await this.delete(SearchCollection.ASSETS, ids);
|
||||
}
|
||||
|
||||
async delete(collection: SearchCollection, ids: string[]): Promise<void> {
|
||||
await this.client
|
||||
.collections(schemaMap[collection].name)
|
||||
.documents()
|
||||
.delete({ filter_by: `id: [${ids.join(',')}]` });
|
||||
}
|
||||
|
||||
async searchAlbums(query: string, filters: SearchFilter): Promise<SearchResult<AlbumEntity>> {
|
||||
const alias = await this.client.aliases(SearchCollection.ALBUMS).retrieve();
|
||||
|
||||
const results = await this.client
|
||||
.collections<AlbumEntity>(alias.collection_name)
|
||||
.documents()
|
||||
.search({
|
||||
q: query,
|
||||
query_by: 'albumName',
|
||||
filter_by: this.getAlbumFilters(filters),
|
||||
});
|
||||
|
||||
return this.asResponse(results, filters.debug);
|
||||
}
|
||||
|
||||
async searchAssets(query: string, filters: SearchFilter): Promise<SearchResult<AssetEntity>> {
|
||||
const alias = await this.client.aliases(SearchCollection.ASSETS).retrieve();
|
||||
const results = await this.client
|
||||
.collections<AssetEntity>(alias.collection_name)
|
||||
.documents()
|
||||
.search({
|
||||
q: query,
|
||||
query_by: [
|
||||
'exifInfo.imageName',
|
||||
'exifInfo.country',
|
||||
'exifInfo.state',
|
||||
'exifInfo.city',
|
||||
'exifInfo.description',
|
||||
'smartInfo.tags',
|
||||
'smartInfo.objects',
|
||||
].join(','),
|
||||
per_page: 250,
|
||||
facet_by: this.getFacetFieldNames(SearchCollection.ASSETS),
|
||||
filter_by: this.getAssetFilters(filters),
|
||||
sort_by: filters.recent ? 'createdAt:desc' : undefined,
|
||||
});
|
||||
|
||||
return this.asResponse(results, filters.debug);
|
||||
}
|
||||
|
||||
async vectorSearch(input: number[], filters: SearchFilter): Promise<SearchResult<AssetEntity>> {
|
||||
const alias = await this.client.aliases(SearchCollection.ASSETS).retrieve();
|
||||
|
||||
const { results } = await this.client.multiSearch.perform({
|
||||
searches: [
|
||||
{
|
||||
collection: alias.collection_name,
|
||||
q: '*',
|
||||
vector_query: `smartInfo.clipEmbedding:([${input.join(',')}], k:100)`,
|
||||
per_page: 250,
|
||||
facet_by: this.getFacetFieldNames(SearchCollection.ASSETS),
|
||||
filter_by: this.getAssetFilters(filters),
|
||||
} as any,
|
||||
],
|
||||
});
|
||||
|
||||
return this.asResponse(results[0] as SearchResponse<AssetEntity>, filters.debug);
|
||||
}
|
||||
|
||||
private asResponse<T extends DocumentSchema>(results: SearchResponse<T>, debug?: boolean): SearchResult<T> {
|
||||
return {
|
||||
page: results.page,
|
||||
total: results.found,
|
||||
@@ -308,51 +280,23 @@ export class TypesenseRepository implements ISearchRepository {
|
||||
counts: facet.counts.map((item) => ({ count: item.count, value: item.value })),
|
||||
fieldName: facet.field_name as string,
|
||||
})),
|
||||
};
|
||||
debug: debug ? results : undefined,
|
||||
} as SearchResult<T>;
|
||||
}
|
||||
|
||||
private async flush() {
|
||||
for (const [collection, schema] of schemas) {
|
||||
if (this.queue[collection].upsert.length > 0) {
|
||||
try {
|
||||
const items = this.queue[collection].upsert.map((item) => removeNil(item));
|
||||
this.logger.debug(`Flushing ${items.length} ${collection} upserts to typesense`);
|
||||
await this.client
|
||||
.collections(schema.name)
|
||||
.documents()
|
||||
.import(items, { action: 'upsert', dirty_values: 'coerce_or_drop' });
|
||||
this.queue[collection].upsert = [];
|
||||
} catch (error) {
|
||||
this.handleError(error);
|
||||
}
|
||||
}
|
||||
|
||||
if (this.queue[collection].delete.length > 0) {
|
||||
try {
|
||||
const items = this.queue[collection].delete;
|
||||
this.logger.debug(`Flushing ${items.length} ${collection} deletes to typesense`);
|
||||
await this.client
|
||||
.collections(schema.name)
|
||||
.documents()
|
||||
.delete({ filter_by: `id: [${items.join(',')}]` });
|
||||
this.queue[collection].delete = [];
|
||||
} catch (error) {
|
||||
this.handleError(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private handleError(error: any): never {
|
||||
private handleError(error: any) {
|
||||
this.logger.error('Unable to index documents');
|
||||
const results = error.importResults || [];
|
||||
for (const result of results) {
|
||||
try {
|
||||
result.document = JSON.parse(result.document);
|
||||
if (result.document?.smartInfo?.clipEmbedding) {
|
||||
result.document.smartInfo.clipEmbedding = '<truncated>';
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
this.logger.verbose(JSON.stringify(results, null, 2));
|
||||
throw error;
|
||||
}
|
||||
|
||||
private async updateAlias(collection: SearchCollection) {
|
||||
@@ -373,6 +317,18 @@ export class TypesenseRepository implements ISearchRepository {
|
||||
}
|
||||
}
|
||||
|
||||
private patch(collection: SearchCollection, items: AssetEntity[] | AlbumEntity[]) {
|
||||
return items.map((item) =>
|
||||
collection === SearchCollection.ASSETS
|
||||
? this.patchAsset(item as AssetEntity)
|
||||
: this.patchAlbum(item as AlbumEntity),
|
||||
);
|
||||
}
|
||||
|
||||
private patchAlbum(album: AlbumEntity): AlbumEntity {
|
||||
return removeNil(album);
|
||||
}
|
||||
|
||||
private patchAsset(asset: AssetEntity): CustomAssetEntity {
|
||||
let custom = asset as CustomAssetEntity;
|
||||
|
||||
@@ -382,9 +338,7 @@ export class TypesenseRepository implements ISearchRepository {
|
||||
custom = { ...custom, geo: [lat, lng] };
|
||||
}
|
||||
|
||||
custom = { ...custom, motion: !!asset.livePhotoVideoId };
|
||||
|
||||
return custom;
|
||||
return removeNil({ ...custom, motion: !!asset.livePhotoVideoId });
|
||||
}
|
||||
|
||||
private getFacetFieldNames(collection: SearchCollection) {
|
||||
@@ -393,4 +347,41 @@ export class TypesenseRepository implements ISearchRepository {
|
||||
.map((field) => field.name)
|
||||
.join(',');
|
||||
}
|
||||
|
||||
private getAlbumFilters(filters: SearchFilter) {
|
||||
const { userId } = filters;
|
||||
const _filters = [`ownerId:${userId}`];
|
||||
if (filters.id) {
|
||||
_filters.push(`id:=${filters.id}`);
|
||||
}
|
||||
|
||||
for (const item of albumSchema.fields || []) {
|
||||
let value = filters[item.name as keyof SearchFilter];
|
||||
if (Array.isArray(value)) {
|
||||
value = `[${value.join(',')}]`;
|
||||
}
|
||||
if (item.facet && value !== undefined) {
|
||||
_filters.push(`${item.name}:${value}`);
|
||||
}
|
||||
}
|
||||
|
||||
return _filters.join(' && ');
|
||||
}
|
||||
|
||||
private getAssetFilters(filters: SearchFilter) {
|
||||
const _filters = [`ownerId:${filters.userId}`];
|
||||
if (filters.id) {
|
||||
_filters.push(`id:=${filters.id}`);
|
||||
}
|
||||
for (const item of assetSchema.fields || []) {
|
||||
let value = filters[item.name as keyof SearchFilter];
|
||||
if (Array.isArray(value)) {
|
||||
value = `[${value.join(',')}]`;
|
||||
}
|
||||
if (item.facet && value !== undefined) {
|
||||
_filters.push(`${item.name}:${value}`);
|
||||
}
|
||||
}
|
||||
return _filters.join(' && ');
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user