mobile bluesky app made with flutter lazurite.stormlightlabs.org/
mobile bluesky flutter
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: vector search (data layer)

+547 -4
+4 -4
docs/tasks/phase-7.md
··· 53 53 54 54 #### Vector Search 55 55 56 - - [ ] `SemanticSearchRepository` - depends on `EmbeddingService`, `EmbeddingRepository` 57 - - [ ] `search(query, accountDid, {source, maxResults})` - embed query, run `nearestNeighborsF32`, filter by `accountDid` and optional `source`, return `List<SemanticSearchResult>` 58 - - [ ] `SemanticSearchResult` model - `postUri`, `score` (cosine similarity as percentage), `source` (saved/liked) 59 - - [ ] Join results back to Drift `SavedPosts`/`LikedPosts` to hydrate full post JSON for display 56 + - [x] `SemanticSearchRepository` - depends on `EmbeddingService`, `EmbeddingRepository` 57 + - [x] `search(query, accountDid, {source, maxResults})` - embed query, run `nearestNeighborsF32`, filter by `accountDid` and optional `source`, return `List<SemanticSearchResult>` 58 + - [x] `SemanticSearchResult` model - `postUri`, `score` (cosine similarity as percentage), `source` (saved/liked) 59 + - [x] Join results back to Drift `SavedPosts`/`LikedPosts` to hydrate full post JSON for display 60 60 61 61 ### Cubit 62 62
+26
lib/features/search/data/embedding_repository.dart
··· 1 + import 'dart:typed_data'; 2 + 1 3 import 'package:lazurite/core/objectbox/embedded_post.dart'; 2 4 import 'package:lazurite/core/objectbox/objectbox_store.dart'; 3 5 import 'package:lazurite/objectbox.g.dart'; ··· 37 39 } 38 40 final query = builder.build(); 39 41 final results = query.find(); 42 + query.close(); 43 + return results; 44 + } 45 + 46 + /// Return the nearest-neighbour [EmbeddedPost] entries for [accountDid], 47 + /// ordered by ascending cosine distance to [queryVector]. 48 + /// 49 + /// [source] optionally filters to 'saved' or 'liked'. 50 + /// [maxResults] caps the number of candidates returned by HNSW. 51 + List<ObjectWithScore<EmbeddedPost>> nearestNeighbors( 52 + Float32List queryVector, 53 + String accountDid, { 54 + String? source, 55 + int maxResults = 20, 56 + }) { 57 + final hnswCondition = EmbeddedPost_.embedding.nearestNeighborsF32(queryVector, maxResults); 58 + final accountCondition = EmbeddedPost_.accountDid.equals(accountDid); 59 + 60 + final condition = source != null 61 + ? hnswCondition.and(accountCondition).and(EmbeddedPost_.source.equals(source)) 62 + : hnswCondition.and(accountCondition); 63 + 64 + final query = _box.query(condition).build(); 65 + final results = query.findWithScores(); 40 66 query.close(); 41 67 return results; 42 68 }
+76
lib/features/search/data/semantic_search_repository.dart
··· 1 + import 'package:lazurite/core/database/app_database.dart'; 2 + import 'package:lazurite/core/embedding/embedding_service.dart'; 3 + import 'package:lazurite/features/search/data/embedding_repository.dart'; 4 + import 'package:lazurite/features/search/data/semantic_search_result.dart'; 5 + 6 + /// Performs on-device semantic (vector) search over a user's saved and liked posts. 7 + /// 8 + /// Embeds the query using [EmbeddingService], runs an HNSW nearest-neighbour 9 + /// search via ObjectBox, then joins each result back to [AppDatabase] to 10 + /// hydrate the full post JSON for display. 11 + class SemanticSearchRepository { 12 + SemanticSearchRepository({ 13 + required EmbeddingService embeddingService, 14 + required EmbeddingRepository embeddingRepository, 15 + required AppDatabase database, 16 + }) : _embeddingService = embeddingService, 17 + _embeddingRepository = embeddingRepository, 18 + _database = database; 19 + 20 + final EmbeddingService _embeddingService; 21 + final EmbeddingRepository _embeddingRepository; 22 + final AppDatabase _database; 23 + 24 + /// Search for posts semantically similar to [query]. 25 + /// 26 + /// Returns an empty list when [EmbeddingService.isAvailable] is false or 27 + /// when [query] is blank. 28 + /// 29 + /// [source] narrows results to 'saved', 'liked', or both when null. 30 + /// [maxResults] caps the number of results (default 20). 31 + Future<List<SemanticSearchResult>> search( 32 + String query, 33 + String accountDid, { 34 + String? source, 35 + int maxResults = 20, 36 + }) async { 37 + if (!_embeddingService.isAvailable) return const []; 38 + if (query.trim().isEmpty) return const []; 39 + 40 + final queryVector = await _embeddingService.embed(query); 41 + 42 + final rawResults = _embeddingRepository.nearestNeighbors( 43 + queryVector, 44 + accountDid, 45 + source: source, 46 + maxResults: maxResults, 47 + ); 48 + 49 + final results = <SemanticSearchResult>[]; 50 + for (final result in rawResults) { 51 + final post = result.object; 52 + // Cosine distance is in [0, 2]; similarity = 1 - distance, clamped to [0, 1]. 53 + final similarity = (1.0 - result.score).clamp(0.0, 1.0); 54 + final scorePercent = similarity * 100.0; 55 + 56 + final postJson = await _fetchPostJson(accountDid, post.postUri, post.source); 57 + if (postJson == null) continue; 58 + 59 + results.add( 60 + SemanticSearchResult(postUri: post.postUri, score: scorePercent, source: post.source, postJson: postJson), 61 + ); 62 + } 63 + 64 + return results; 65 + } 66 + 67 + Future<String?> _fetchPostJson(String accountDid, String postUri, String source) async { 68 + if (source == 'saved') { 69 + final entry = await _database.getSavedPost(accountDid, postUri); 70 + return entry?.postJson; 71 + } else { 72 + final entry = await _database.getLikedPost(accountDid, postUri); 73 + return entry?.postJson; 74 + } 75 + } 76 + }
+37
lib/features/search/data/semantic_search_result.dart
··· 1 + /// A single result from a semantic (vector) search. 2 + class SemanticSearchResult { 3 + const SemanticSearchResult({ 4 + required this.postUri, 5 + required this.score, 6 + required this.source, 7 + required this.postJson, 8 + }); 9 + 10 + /// AT URI of the post (e.g. at://did:plc:xxx/app.bsky.feed.post/yyy). 11 + final String postUri; 12 + 13 + /// Cosine similarity expressed as a percentage in [0, 100]. 14 + /// 15 + /// Derived from the ObjectBox cosine distance: `(1 - distance) * 100`. 16 + final double score; 17 + 18 + /// Whether this post came from the user's saves ('saved') or likes ('liked'). 19 + final String source; 20 + 21 + /// Raw JSON for the post, fetched from the Drift [SavedPosts] or [LikedPosts] 22 + /// table for display. 23 + final String postJson; 24 + 25 + @override 26 + bool operator ==(Object other) => 27 + identical(this, other) || 28 + other is SemanticSearchResult && 29 + runtimeType == other.runtimeType && 30 + postUri == other.postUri && 31 + score == other.score && 32 + source == other.source && 33 + postJson == other.postJson; 34 + 35 + @override 36 + int get hashCode => Object.hash(postUri, score, source, postJson); 37 + }
+95
test/features/search/data/embedding_repository_test.dart
··· 1 + import 'dart:typed_data'; 2 + 1 3 import 'package:flutter_test/flutter_test.dart'; 2 4 import 'package:lazurite/core/objectbox/embedded_post.dart'; 3 5 import 'package:lazurite/core/objectbox/objectbox_store.dart'; ··· 9 11 ObjectBoxStore _makeInMemoryStore() { 10 12 final store = Store(getObjectBoxModel(), directory: 'memory:test-${_storeCounter++}'); 11 13 return ObjectBoxStore.forTesting(store); 14 + } 15 + 16 + /// Fixed 384-dimensional unit vector. 17 + Float32List _unitVector({double value = 1.0}) { 18 + final v = Float32List(384); 19 + for (var i = 0; i < 384; i++) { 20 + v[i] = value; 21 + } 22 + return v; 12 23 } 13 24 14 25 EmbeddedPost _post({ ··· 133 144 134 145 expect(repo.countByAccount('did:plc:a'), equals(2)); 135 146 expect(repo.countByAccount('did:plc:b'), equals(1)); 147 + }); 148 + }); 149 + 150 + group('nearestNeighbors', () { 151 + test('returns empty list when no embeddings exist', () { 152 + final results = repo.nearestNeighbors(_unitVector(), 'did:plc:a'); 153 + expect(results, isEmpty); 154 + }); 155 + 156 + test('returns posts for the matching account only', () { 157 + repo.upsert(_post(postUri: 'at://did/post/1', accountDid: 'did:plc:a', embedding: _unitVector().toList())); 158 + repo.upsert(_post(postUri: 'at://did/post/2', accountDid: 'did:plc:b', embedding: _unitVector().toList())); 159 + 160 + final results = repo.nearestNeighbors(_unitVector(), 'did:plc:a'); 161 + 162 + expect(results.length, equals(1)); 163 + expect(results.first.object.accountDid, equals('did:plc:a')); 164 + }); 165 + 166 + test('filters by source when provided', () { 167 + repo.upsert( 168 + _post( 169 + postUri: 'at://did/post/1', 170 + accountDid: 'did:plc:a', 171 + source: 'saved', 172 + embedding: _unitVector().toList(), 173 + ), 174 + ); 175 + repo.upsert( 176 + _post( 177 + postUri: 'at://did/post/2', 178 + accountDid: 'did:plc:a', 179 + source: 'liked', 180 + embedding: _unitVector().toList(), 181 + ), 182 + ); 183 + 184 + final savedOnly = repo.nearestNeighbors(_unitVector(), 'did:plc:a', source: 'saved'); 185 + expect(savedOnly.length, equals(1)); 186 + expect(savedOnly.first.object.source, equals('saved')); 187 + 188 + final likedOnly = repo.nearestNeighbors(_unitVector(), 'did:plc:a', source: 'liked'); 189 + expect(likedOnly.length, equals(1)); 190 + expect(likedOnly.first.object.source, equals('liked')); 191 + }); 192 + 193 + test('returns all sources when source is not provided', () { 194 + repo.upsert( 195 + _post( 196 + postUri: 'at://did/post/1', 197 + accountDid: 'did:plc:a', 198 + source: 'saved', 199 + embedding: _unitVector().toList(), 200 + ), 201 + ); 202 + repo.upsert( 203 + _post( 204 + postUri: 'at://did/post/2', 205 + accountDid: 'did:plc:a', 206 + source: 'liked', 207 + embedding: _unitVector().toList(), 208 + ), 209 + ); 210 + 211 + final results = repo.nearestNeighbors(_unitVector(), 'did:plc:a'); 212 + expect(results.length, equals(2)); 213 + }); 214 + 215 + test('respects maxResults cap', () { 216 + for (var i = 1; i <= 5; i++) { 217 + repo.upsert(_post(postUri: 'at://did/post/$i', accountDid: 'did:plc:a', embedding: _unitVector().toList())); 218 + } 219 + 220 + final results = repo.nearestNeighbors(_unitVector(), 'did:plc:a', maxResults: 3); 221 + expect(results.length, lessThanOrEqualTo(3)); 222 + }); 223 + 224 + test('scores are non-negative (cosine distance in [0, 2])', () { 225 + repo.upsert(_post(postUri: 'at://did/post/1', accountDid: 'did:plc:a', embedding: _unitVector().toList())); 226 + 227 + final results = repo.nearestNeighbors(_unitVector(), 'did:plc:a'); 228 + expect(results, isNotEmpty); 229 + expect(results.first.score, greaterThanOrEqualTo(0.0)); 230 + expect(results.first.score, lessThanOrEqualTo(2.0)); 136 231 }); 137 232 }); 138 233 });
+309
test/features/search/data/semantic_search_repository_test.dart
··· 1 + import 'dart:convert'; 2 + import 'dart:typed_data'; 3 + 4 + import 'package:drift/drift.dart' show Value; 5 + import 'package:drift/native.dart'; 6 + import 'package:flutter_test/flutter_test.dart'; 7 + import 'package:lazurite/core/database/app_database.dart'; 8 + import 'package:lazurite/core/embedding/embedding_service.dart'; 9 + import 'package:lazurite/core/objectbox/embedded_post.dart'; 10 + import 'package:lazurite/core/objectbox/objectbox_store.dart'; 11 + import 'package:lazurite/features/search/data/embedding_repository.dart'; 12 + import 'package:lazurite/features/search/data/semantic_search_repository.dart'; 13 + import 'package:lazurite/objectbox.g.dart'; 14 + 15 + var _storeCounter = 0; 16 + 17 + ObjectBoxStore _makeInMemoryStore() { 18 + final store = Store(getObjectBoxModel(), directory: 'memory:search-repo-${_storeCounter++}'); 19 + return ObjectBoxStore.forTesting(store); 20 + } 21 + 22 + Float32List _unitVector() { 23 + final v = Float32List(384); 24 + const val = 1.0 / 384; 25 + for (var i = 0; i < 384; i++) { 26 + v[i] = val; 27 + } 28 + return v; 29 + } 30 + 31 + EmbeddingService _availableService() => EmbeddingService.forTesting((_) async => _unitVector()); 32 + 33 + EmbeddingService _unavailableService() => EmbeddingService(); 34 + 35 + String _savedPostJson(String uri, String text) => jsonEncode({ 36 + 'uri': uri, 37 + 'cid': 'bafycid1', 38 + 'author': {'did': 'did:plc:author', 'handle': 'author.bsky.social'}, 39 + 'record': {'\$type': 'app.bsky.feed.post', 'text': text, 'createdAt': '2024-01-01T00:00:00.000Z'}, 40 + 'replyCount': 0, 41 + 'repostCount': 0, 42 + 'likeCount': 0, 43 + 'quoteCount': 0, 44 + 'indexedAt': '2024-01-01T00:00:00.000Z', 45 + }); 46 + 47 + String _likedPostJson(String uri, String text) => jsonEncode({ 48 + 'post': { 49 + 'uri': uri, 50 + 'cid': 'bafycid2', 51 + 'author': {'did': 'did:plc:author', 'handle': 'author.bsky.social'}, 52 + 'record': {'\$type': 'app.bsky.feed.post', 'text': text, 'createdAt': '2024-01-01T00:00:00.000Z'}, 53 + 'replyCount': 0, 54 + 'repostCount': 0, 55 + 'likeCount': 0, 56 + 'quoteCount': 0, 57 + 'indexedAt': '2024-01-01T00:00:00.000Z', 58 + }, 59 + }); 60 + 61 + void main() { 62 + late AppDatabase database; 63 + late ObjectBoxStore objectBoxStore; 64 + late EmbeddingRepository embeddingRepo; 65 + late EmbeddingService embeddingService; 66 + 67 + setUp(() async { 68 + database = AppDatabase(executor: NativeDatabase.memory()); 69 + objectBoxStore = _makeInMemoryStore(); 70 + embeddingRepo = EmbeddingRepository(objectBoxStore); 71 + embeddingService = _availableService(); 72 + await embeddingService.initialize(); 73 + }); 74 + 75 + tearDown(() async { 76 + await database.close(); 77 + objectBoxStore.close(); 78 + }); 79 + 80 + SemanticSearchRepository makeRepo({EmbeddingService? service}) => SemanticSearchRepository( 81 + embeddingService: service ?? embeddingService, 82 + embeddingRepository: embeddingRepo, 83 + database: database, 84 + ); 85 + 86 + /// Inserts an EmbeddedPost in ObjectBox and the corresponding raw post JSON 87 + /// in the Drift SavedPosts table. 88 + Future<void> insertSavedPost( 89 + String postUri, 90 + String accountDid, { 91 + String text = 'post text', 92 + List<double>? embedding, 93 + }) async { 94 + final json = _savedPostJson(postUri, text); 95 + embeddingRepo.upsert( 96 + EmbeddedPost( 97 + postUri: postUri, 98 + accountDid: accountDid, 99 + source: 'saved', 100 + indexedText: text, 101 + embedding: embedding ?? _unitVector().toList(), 102 + embeddedAt: DateTime(2026, 1, 1), 103 + ), 104 + ); 105 + await database.savePost( 106 + SavedPostsCompanion(accountDid: Value(accountDid), postUri: Value(postUri), postJson: Value(json)), 107 + ); 108 + } 109 + 110 + /// Inserts an EmbeddedPost in ObjectBox and the corresponding raw post JSON 111 + /// in the Drift LikedPosts table. 112 + Future<void> insertLikedPost( 113 + String postUri, 114 + String accountDid, { 115 + String text = 'post text', 116 + List<double>? embedding, 117 + }) async { 118 + final json = _likedPostJson(postUri, text); 119 + embeddingRepo.upsert( 120 + EmbeddedPost( 121 + postUri: postUri, 122 + accountDid: accountDid, 123 + source: 'liked', 124 + indexedText: text, 125 + embedding: embedding ?? _unitVector().toList(), 126 + embeddedAt: DateTime(2026, 1, 1), 127 + ), 128 + ); 129 + await database.upsertLikedPost( 130 + LikedPostsCompanion(accountDid: Value(accountDid), postUri: Value(postUri), postJson: Value(json)), 131 + ); 132 + } 133 + 134 + group('SemanticSearchRepository', () { 135 + group('search', () { 136 + test('returns empty list when EmbeddingService is unavailable', () async { 137 + await insertSavedPost('at://did/post/1', 'did:plc:user'); 138 + 139 + final repo = makeRepo(service: _unavailableService()); 140 + final results = await repo.search('hello', 'did:plc:user'); 141 + 142 + expect(results, isEmpty); 143 + }); 144 + 145 + test('returns empty list when query is empty', () async { 146 + await insertSavedPost('at://did/post/1', 'did:plc:user'); 147 + 148 + final repo = makeRepo(); 149 + final results = await repo.search('', 'did:plc:user'); 150 + 151 + expect(results, isEmpty); 152 + }); 153 + 154 + test('returns empty list when query is whitespace only', () async { 155 + await insertSavedPost('at://did/post/1', 'did:plc:user'); 156 + 157 + final repo = makeRepo(); 158 + final results = await repo.search(' ', 'did:plc:user'); 159 + 160 + expect(results, isEmpty); 161 + }); 162 + 163 + test('returns empty list when no posts are indexed', () async { 164 + final repo = makeRepo(); 165 + final results = await repo.search('query', 'did:plc:user'); 166 + 167 + expect(results, isEmpty); 168 + }); 169 + 170 + test('returns SemanticSearchResult with correct postUri and source', () async { 171 + await insertSavedPost('at://did/post/1', 'did:plc:user', text: 'interesting article'); 172 + 173 + final repo = makeRepo(); 174 + final results = await repo.search('interesting', 'did:plc:user'); 175 + 176 + expect(results, hasLength(1)); 177 + expect(results.first.postUri, equals('at://did/post/1')); 178 + expect(results.first.source, equals('saved')); 179 + }); 180 + 181 + test('result contains postJson from Drift SavedPosts', () async { 182 + await insertSavedPost('at://did/post/1', 'did:plc:user', text: 'test post'); 183 + 184 + final repo = makeRepo(); 185 + final results = await repo.search('test', 'did:plc:user'); 186 + 187 + expect(results, hasLength(1)); 188 + final decoded = jsonDecode(results.first.postJson) as Map<String, dynamic>; 189 + expect(decoded['uri'], equals('at://did/post/1')); 190 + }); 191 + 192 + test('result contains postJson from Drift LikedPosts', () async { 193 + await insertLikedPost('at://did/post/2', 'did:plc:user', text: 'liked content'); 194 + 195 + final repo = makeRepo(); 196 + final results = await repo.search('content', 'did:plc:user'); 197 + 198 + expect(results, hasLength(1)); 199 + expect(results.first.source, equals('liked')); 200 + final decoded = jsonDecode(results.first.postJson) as Map<String, dynamic>; 201 + // liked post JSON has a nested 'post' key 202 + expect((decoded['post'] as Map<String, dynamic>)['uri'], equals('at://did/post/2')); 203 + }); 204 + 205 + test('score is in the range [0, 100]', () async { 206 + await insertSavedPost('at://did/post/1', 'did:plc:user'); 207 + 208 + final repo = makeRepo(); 209 + final results = await repo.search('hello', 'did:plc:user'); 210 + 211 + expect(results, isNotEmpty); 212 + expect(results.first.score, greaterThanOrEqualTo(0.0)); 213 + expect(results.first.score, lessThanOrEqualTo(100.0)); 214 + }); 215 + 216 + test('identical-vector query produces near-100% score', () async { 217 + await insertSavedPost('at://did/post/1', 'did:plc:user'); 218 + 219 + // Query with the same unit vector the post was indexed with. 220 + final svc = EmbeddingService.forTesting((_) async => _unitVector()); 221 + await svc.initialize(); 222 + final repo = makeRepo(service: svc); 223 + final results = await repo.search('hello', 'did:plc:user'); 224 + 225 + expect(results, isNotEmpty); 226 + // Score should be very close to 100%. 227 + expect(results.first.score, greaterThan(90.0)); 228 + }); 229 + 230 + test('filters results to the searched accountDid only', () async { 231 + await insertSavedPost('at://did/post/1', 'did:plc:user-a'); 232 + await insertSavedPost('at://did/post/2', 'did:plc:user-b'); 233 + 234 + final repo = makeRepo(); 235 + final results = await repo.search('hello', 'did:plc:user-a'); 236 + 237 + expect(results.length, equals(1)); 238 + expect(results.first.postUri, equals('at://did/post/1')); 239 + }); 240 + 241 + test('returns both saved and liked posts by default', () async { 242 + await insertSavedPost('at://did/post/saved', 'did:plc:user'); 243 + await insertLikedPost('at://did/post/liked', 'did:plc:user'); 244 + 245 + final repo = makeRepo(); 246 + final results = await repo.search('hello', 'did:plc:user'); 247 + 248 + expect(results.length, equals(2)); 249 + final sources = results.map((r) => r.source).toSet(); 250 + expect(sources, containsAll(['saved', 'liked'])); 251 + }); 252 + 253 + test('filters to saved posts when source is "saved"', () async { 254 + await insertSavedPost('at://did/post/saved', 'did:plc:user'); 255 + await insertLikedPost('at://did/post/liked', 'did:plc:user'); 256 + 257 + final repo = makeRepo(); 258 + final results = await repo.search('hello', 'did:plc:user', source: 'saved'); 259 + 260 + expect(results.length, equals(1)); 261 + expect(results.first.source, equals('saved')); 262 + expect(results.first.postUri, equals('at://did/post/saved')); 263 + }); 264 + 265 + test('filters to liked posts when source is "liked"', () async { 266 + await insertSavedPost('at://did/post/saved', 'did:plc:user'); 267 + await insertLikedPost('at://did/post/liked', 'did:plc:user'); 268 + 269 + final repo = makeRepo(); 270 + final results = await repo.search('hello', 'did:plc:user', source: 'liked'); 271 + 272 + expect(results.length, equals(1)); 273 + expect(results.first.source, equals('liked')); 274 + expect(results.first.postUri, equals('at://did/post/liked')); 275 + }); 276 + 277 + test('skips results whose postJson cannot be found in Drift', () async { 278 + // Insert embedding in ObjectBox but NOT in Drift. 279 + embeddingRepo.upsert( 280 + EmbeddedPost( 281 + postUri: 'at://did/post/orphan', 282 + accountDid: 'did:plc:user', 283 + source: 'saved', 284 + indexedText: 'orphaned post', 285 + embedding: _unitVector().toList(), 286 + embeddedAt: DateTime(2026, 1, 1), 287 + ), 288 + ); 289 + 290 + final repo = makeRepo(); 291 + final results = await repo.search('hello', 'did:plc:user'); 292 + 293 + // The orphaned result is silently skipped. 294 + expect(results, isEmpty); 295 + }); 296 + 297 + test('respects maxResults limit', () async { 298 + for (var i = 1; i <= 10; i++) { 299 + await insertSavedPost('at://did/post/$i', 'did:plc:user'); 300 + } 301 + 302 + final repo = makeRepo(); 303 + final results = await repo.search('hello', 'did:plc:user', maxResults: 3); 304 + 305 + expect(results.length, lessThanOrEqualTo(3)); 306 + }); 307 + }); 308 + }); 309 + }