Another remote for gh:zotero-rag/zotero-rag
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix: handle unordered columns in lancedb return value

authored by

Rahul Yedida and committed by
Rahul Yedida
8b05a7b9 430e46e9

+30 -23
+30 -23
zqa-rag/src/vector/backends/lance.rs
··· 348 348 )) 349 349 })?; 350 350 351 - if let Some((by_idx, _)) = self.schema.column_with_name(by) 352 - && let Some((key_idx, _)) = self.schema.column_with_name(key) 351 + if self.schema.column_with_name(by).is_none() || self.schema.column_with_name(key).is_none() 353 352 { 354 - let mut stream = table.query().execute().await?; 355 - let mut seen_by_values = HashSet::new(); 356 - let mut duplicate_keys = Vec::new(); 353 + return Err(LanceError::ParameterError(format!( 354 + "column '{by}' or '{key}' not found in schema" 355 + ))); 356 + } 357 + 358 + let mut stream = table.query().execute().await?; 359 + let mut seen_by_values = HashSet::new(); 360 + let mut duplicate_keys = Vec::new(); 361 + 362 + while let Some(batch) = stream.try_next().await? { 363 + let batch_schema = batch.schema(); 364 + let Some((by_idx, _)) = batch_schema.column_with_name(by) else { 365 + continue; 366 + }; 367 + let Some((key_idx, _)) = batch_schema.column_with_name(key) else { 368 + continue; 369 + }; 357 370 358 - while let Some(batch) = stream.try_next().await? { 359 - let by_values = get_column_from_batch(&batch, by_idx); 360 - let key_values = get_column_from_batch(&batch, key_idx); 371 + let by_values = get_column_from_batch(&batch, by_idx); 372 + let key_values = get_column_from_batch(&batch, key_idx); 361 373 362 - for (by_val, key_val) in by_values.into_iter().zip(key_values) { 363 - if !seen_by_values.insert(by_val) { 364 - duplicate_keys.push(key_val); 365 - } 374 + for (by_val, key_val) in by_values.into_iter().zip(key_values) { 375 + if !seen_by_values.insert(by_val) { 376 + duplicate_keys.push(key_val); 366 377 } 367 378 } 368 - 369 - // Delete duplicate rows 370 - let deleted_count = duplicate_keys.len(); 371 - if !duplicate_keys.is_empty() { 372 - self.delete_rows(key, &duplicate_keys).await?; 373 - } 379 + } 374 380 375 - Ok(deleted_count) 376 - } else { 377 - Err(LanceError::ParameterError(format!( 378 - "column '{by}' or '{key}' not found in schema" 379 - ))) 381 + // Delete duplicate rows 382 + let deleted_count = duplicate_keys.len(); 383 + if !duplicate_keys.is_empty() { 384 + self.delete_rows(key, &duplicate_keys).await?; 380 385 } 386 + 387 + Ok(deleted_count) 381 388 } 382 389 383 390 /// Return all the rows in the LanceDB table, selecting only the columns specified. This is useful