this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

bastard

+5068 -69
+117 -1
src/core/graph.rs
··· 1 1 use dashmap::DashMap; 2 2 use std::sync::Arc; 3 + use std::collections::HashMap; 3 4 use parking_lot::RwLock; 4 5 use crate::{ 5 6 NodeId, RelationshipId, Node, Relationship, 6 7 Result, GigabrainError 7 8 }; 8 - use crate::core::{relationship::Direction, GraphSchema}; 9 + use crate::core::{relationship::Direction, GraphSchema, PropertyValue}; 10 + use crate::index::{IndexManager, IndexType, PersistentIndexManager}; 11 + use crate::storage::StorageEngine; 9 12 10 13 pub struct Graph { 11 14 nodes: Arc<DashMap<NodeId, Node>>, 12 15 relationships: Arc<DashMap<RelationshipId, Relationship>>, 13 16 node_relationships: Arc<DashMap<NodeId, Vec<RelationshipId>>>, 14 17 schema: Arc<RwLock<GraphSchema>>, 18 + index_manager: Arc<IndexManager>, 19 + persistent_index_manager: Option<Arc<PersistentIndexManager>>, 15 20 16 21 next_node_id: Arc<RwLock<u64>>, 17 22 next_relationship_id: Arc<RwLock<u64>>, ··· 24 29 relationships: Arc::new(DashMap::new()), 25 30 node_relationships: Arc::new(DashMap::new()), 26 31 schema: Arc::new(RwLock::new(GraphSchema::new())), 32 + index_manager: Arc::new(IndexManager::new()), 33 + persistent_index_manager: None, 34 + next_node_id: Arc::new(RwLock::new(0)), 35 + next_relationship_id: Arc::new(RwLock::new(0)), 36 + } 37 + } 38 + 39 + /// Create a new graph with persistent storage for indexes 40 + pub fn with_persistent_indexes(storage: Arc<dyn StorageEngine>) -> Self { 41 + let memory_index_manager = Arc::new(IndexManager::new()); 42 + let persistent_index_manager = Arc::new(PersistentIndexManager::with_memory_manager( 43 + memory_index_manager.clone(), 44 + storage 45 + )); 46 + 47 + Self { 48 + nodes: Arc::new(DashMap::new()), 49 + relationships: Arc::new(DashMap::new()), 50 + node_relationships: Arc::new(DashMap::new()), 51 + schema: Arc::new(RwLock::new(GraphSchema::new())), 52 + index_manager: memory_index_manager, 53 + persistent_index_manager: Some(persistent_index_manager), 27 54 next_node_id: Arc::new(RwLock::new(0)), 28 55 next_relationship_id: Arc::new(RwLock::new(0)), 29 56 } ··· 46 73 F: FnOnce(&mut Node), 47 74 { 48 75 if let Some(mut node) = self.nodes.get_mut(&id) { 76 + // Store old state for index updates 77 + let old_labels: Vec<_> = node.labels.iter().cloned().collect(); 78 + let old_properties = node.properties.clone(); 79 + 49 80 update_fn(&mut node); 81 + 82 + // Update indexes with new node state 83 + let new_labels: Vec<_> = node.labels.iter().cloned().collect(); 84 + let new_properties = node.properties.clone(); 85 + 86 + // Ensure indexes exist for new labels 87 + for &label_id in &new_labels { 88 + if !old_labels.contains(&label_id) { 89 + let index_type = IndexType::Label(label_id); 90 + if let Err(e) = self.index_manager.create_index(index_type, None, false) { 91 + tracing::debug!("Label index may already exist: {}", e); 92 + } 93 + } 94 + } 95 + 96 + // Ensure indexes exist for new properties 97 + for &property_key_id in new_properties.keys() { 98 + if !old_properties.contains_key(&property_key_id) { 99 + let index_type = IndexType::Property(property_key_id); 100 + if let Err(e) = self.index_manager.create_index(index_type, None, false) { 101 + tracing::debug!("Property index may already exist: {}", e); 102 + } 103 + } 104 + } 105 + 106 + // Update index manager 107 + if let Err(e) = self.index_manager.update_node_properties(id, &old_properties, &new_properties) { 108 + tracing::warn!("Failed to update property indexes: {}", e); 109 + } 110 + 111 + // Add node to indexes with new labels and properties 112 + if let Err(e) = self.index_manager.add_node(id, &new_labels, &new_properties) { 113 + tracing::warn!("Failed to add node to indexes: {}", e); 114 + } 115 + 50 116 Ok(()) 51 117 } else { 52 118 Err(GigabrainError::NodeNotFound(id)) ··· 132 198 } 133 199 } 134 200 201 + // Remove from indexes 202 + if let Err(e) = self.index_manager.remove_node(id) { 203 + tracing::warn!("Failed to remove node from indexes: {}", e); 204 + } 205 + 135 206 self.nodes 136 207 .remove(&id) 137 208 .ok_or(GigabrainError::NodeNotFound(id))?; ··· 163 234 &self.schema 164 235 } 165 236 237 + pub fn index_manager(&self) -> &Arc<IndexManager> { 238 + &self.index_manager 239 + } 240 + 241 + /// Get the persistent index manager if available 242 + pub fn persistent_index_manager(&self) -> Option<&Arc<PersistentIndexManager>> { 243 + self.persistent_index_manager.as_ref() 244 + } 245 + 246 + /// Flush indexes to persistent storage if available 247 + pub async fn flush_indexes(&self) -> Result<()> { 248 + if let Some(persistent_manager) = &self.persistent_index_manager { 249 + persistent_manager.flush().await?; 250 + } 251 + Ok(()) 252 + } 253 + 254 + /// Load indexes from storage if persistent manager is available 255 + pub async fn load_indexes(&self) -> Result<()> { 256 + if let Some(persistent_manager) = &self.persistent_index_manager { 257 + persistent_manager.ensure_loaded().await?; 258 + } 259 + Ok(()) 260 + } 261 + 166 262 /// Get all node IDs in the graph 167 263 pub fn get_all_nodes(&self) -> Vec<NodeId> { 168 264 self.nodes.iter().map(|entry| *entry.key()).collect() 265 + } 266 + 267 + /// Find nodes by label using indexes when available 268 + pub fn find_nodes_by_label(&self, label_name: &str) -> Result<Vec<NodeId>> { 269 + let schema = self.schema.read(); 270 + if let Some(&label_id) = schema.labels.get(label_name) { 271 + self.index_manager.get_nodes_by_label(label_id) 272 + } else { 273 + Ok(Vec::new()) 274 + } 275 + } 276 + 277 + /// Find nodes by property value using indexes when available 278 + pub fn find_nodes_by_property(&self, property_name: &str, value: &PropertyValue) -> Result<Vec<NodeId>> { 279 + let schema = self.schema.read(); 280 + if let Some(&property_key_id) = schema.property_keys.get(property_name) { 281 + self.index_manager.get_nodes_by_property(property_key_id, value) 282 + } else { 283 + Ok(Vec::new()) 284 + } 169 285 } 170 286 }
+2
src/core/mod.rs
··· 3 3 use crate::{NodeId, RelationshipId, LabelId, PropertyKeyId, Result}; 4 4 5 5 pub mod graph; 6 + pub mod persistent_graph; 6 7 pub mod node; 7 8 pub mod relationship; 8 9 pub mod property; 9 10 pub mod schema_validation; 10 11 11 12 pub use graph::Graph; 13 + pub use persistent_graph::PersistentGraph; 12 14 pub use node::Node; 13 15 pub use relationship::Relationship; 14 16 pub use property::{Property, PropertyValue};
+479
src/core/persistent_graph.rs
··· 1 + use dashmap::DashMap; 2 + use std::sync::Arc; 3 + use parking_lot::RwLock; 4 + use crate::{ 5 + NodeId, RelationshipId, Node, Relationship, 6 + Result, GigabrainError 7 + }; 8 + use crate::core::{relationship::Direction, GraphSchema}; 9 + use crate::persistence::{PersistentStorage, StorageBackend}; 10 + 11 + /// A graph implementation that persists data to storage 12 + pub struct PersistentGraph { 13 + // In-memory cache for fast access 14 + nodes: Arc<DashMap<NodeId, Node>>, 15 + relationships: Arc<DashMap<RelationshipId, Relationship>>, 16 + node_relationships: Arc<DashMap<NodeId, Vec<RelationshipId>>>, 17 + schema: Arc<RwLock<GraphSchema>>, 18 + 19 + // ID generators 20 + next_node_id: Arc<RwLock<u64>>, 21 + next_relationship_id: Arc<RwLock<u64>>, 22 + 23 + // Persistent storage 24 + storage: Arc<PersistentStorage>, 25 + 26 + // Flag to enable/disable write-through caching 27 + write_through: bool, 28 + } 29 + 30 + impl PersistentGraph { 31 + /// Create a new persistent graph with the given storage backend 32 + pub async fn new(storage_backend: Arc<dyn StorageBackend>) -> Result<Self> { 33 + let storage = Arc::new(PersistentStorage::new(storage_backend)); 34 + storage.initialize().await?; 35 + 36 + let mut graph = Self { 37 + nodes: Arc::new(DashMap::new()), 38 + relationships: Arc::new(DashMap::new()), 39 + node_relationships: Arc::new(DashMap::new()), 40 + schema: Arc::new(RwLock::new(GraphSchema::new())), 41 + next_node_id: Arc::new(RwLock::new(0)), 42 + next_relationship_id: Arc::new(RwLock::new(0)), 43 + storage, 44 + write_through: true, 45 + }; 46 + 47 + // Load existing data from storage 48 + graph.load_from_storage().await?; 49 + 50 + Ok(graph) 51 + } 52 + 53 + /// Load data from persistent storage into memory cache 54 + async fn load_from_storage(&mut self) -> Result<()> { 55 + tracing::info!("Loading graph data from storage..."); 56 + 57 + // Load schema 58 + if let Some(schema) = self.storage.get_schema().await? { 59 + *self.schema.write() = schema; 60 + } 61 + 62 + // Load node and relationship counters 63 + if let Some(node_counter) = self.storage.get_counter("node_counter").await? { 64 + *self.next_node_id.write() = node_counter; 65 + } 66 + 67 + if let Some(rel_counter) = self.storage.get_counter("relationship_counter").await? { 68 + *self.next_relationship_id.write() = rel_counter; 69 + } 70 + 71 + // Load all nodes 72 + let node_ids = self.storage.get_all_node_ids().await?; 73 + for node_id in node_ids { 74 + if let Some(node) = self.storage.get_node(node_id).await? { 75 + self.nodes.insert(node_id, node); 76 + self.node_relationships.insert(node_id, Vec::new()); 77 + } 78 + } 79 + 80 + // Load all relationships and build relationship index 81 + let relationship_ids = self.storage.get_all_relationship_ids().await?; 82 + for rel_id in relationship_ids { 83 + if let Some(relationship) = self.storage.get_relationship(rel_id).await? { 84 + let start_node = relationship.start_node; 85 + let end_node = relationship.end_node; 86 + 87 + self.relationships.insert(rel_id, relationship); 88 + 89 + // Update relationship indices 90 + self.node_relationships.entry(start_node).and_modify(|rels| rels.push(rel_id)); 91 + if start_node != end_node { 92 + self.node_relationships.entry(end_node).and_modify(|rels| rels.push(rel_id)); 93 + } 94 + } 95 + } 96 + 97 + tracing::info!( 98 + "Loaded {} nodes and {} relationships from storage", 99 + self.nodes.len(), 100 + self.relationships.len() 101 + ); 102 + 103 + Ok(()) 104 + } 105 + 106 + /// Persist current state to storage 107 + pub async fn persist_to_storage(&self) -> Result<()> { 108 + tracing::info!("Persisting graph data to storage..."); 109 + 110 + // Persist schema 111 + self.storage.store_schema(&self.schema.read()).await?; 112 + 113 + // Persist counters 114 + self.storage.store_counter("node_counter", *self.next_node_id.read()).await?; 115 + self.storage.store_counter("relationship_counter", *self.next_relationship_id.read()).await?; 116 + 117 + // Persist all nodes 118 + for entry in self.nodes.iter() { 119 + let (_node_id, node) = entry.pair(); 120 + self.storage.store_node(node).await?; 121 + } 122 + 123 + // Persist all relationships 124 + for entry in self.relationships.iter() { 125 + let (_rel_id, relationship) = entry.pair(); 126 + self.storage.store_relationship(relationship).await?; 127 + } 128 + 129 + // Flush to ensure data is written 130 + self.storage.flush().await?; 131 + 132 + tracing::info!("Graph data persisted to storage successfully"); 133 + Ok(()) 134 + } 135 + 136 + pub async fn create_node(&self) -> Result<NodeId> { 137 + let mut id_gen = self.next_node_id.write(); 138 + let id = NodeId(*id_gen); 139 + *id_gen += 1; 140 + drop(id_gen); 141 + 142 + let node = Node::new(id); 143 + 144 + // Store in cache 145 + self.nodes.insert(id, node.clone()); 146 + self.node_relationships.insert(id, Vec::new()); 147 + 148 + // Persist to storage if write-through is enabled 149 + if self.write_through { 150 + self.storage.store_node(&node).await?; 151 + self.storage.store_counter("node_counter", *self.next_node_id.read()).await?; 152 + } 153 + 154 + Ok(id) 155 + } 156 + 157 + pub async fn update_node<F>(&self, id: NodeId, update_fn: F) -> Result<()> 158 + where 159 + F: FnOnce(&mut Node), 160 + { 161 + if let Some(mut node) = self.nodes.get_mut(&id) { 162 + update_fn(&mut node); 163 + 164 + // Persist to storage if write-through is enabled 165 + if self.write_through { 166 + self.storage.store_node(&node).await?; 167 + } 168 + 169 + Ok(()) 170 + } else { 171 + Err(GigabrainError::NodeNotFound(id)) 172 + } 173 + } 174 + 175 + pub async fn create_relationship( 176 + &self, 177 + start: NodeId, 178 + end: NodeId, 179 + rel_type: u32, 180 + ) -> Result<RelationshipId> { 181 + if !self.nodes.contains_key(&start) { 182 + return Err(GigabrainError::NodeNotFound(start)); 183 + } 184 + if !self.nodes.contains_key(&end) { 185 + return Err(GigabrainError::NodeNotFound(end)); 186 + } 187 + 188 + let mut id_gen = self.next_relationship_id.write(); 189 + let id = RelationshipId(*id_gen); 190 + *id_gen += 1; 191 + drop(id_gen); 192 + 193 + let relationship = Relationship::new(id, start, end, rel_type); 194 + 195 + // Store in cache 196 + self.relationships.insert(id, relationship.clone()); 197 + 198 + self.node_relationships.entry(start).and_modify(|rels| rels.push(id)); 199 + if start != end { 200 + self.node_relationships.entry(end).and_modify(|rels| rels.push(id)); 201 + } 202 + 203 + // Persist to storage if write-through is enabled 204 + if self.write_through { 205 + self.storage.store_relationship(&relationship).await?; 206 + self.storage.store_counter("relationship_counter", *self.next_relationship_id.read()).await?; 207 + } 208 + 209 + Ok(id) 210 + } 211 + 212 + pub async fn get_node(&self, id: NodeId) -> Option<Node> { 213 + // Try cache first 214 + if let Some(node) = self.nodes.get(&id) { 215 + return Some(node.clone()); 216 + } 217 + 218 + // If not in cache, try loading from storage 219 + if let Ok(Some(node)) = self.storage.get_node(id).await { 220 + // Cache the loaded node 221 + self.nodes.insert(id, node.clone()); 222 + Some(node) 223 + } else { 224 + None 225 + } 226 + } 227 + 228 + pub async fn get_relationship(&self, id: RelationshipId) -> Option<Relationship> { 229 + // Try cache first 230 + if let Some(rel) = self.relationships.get(&id) { 231 + return Some(rel.clone()); 232 + } 233 + 234 + // If not in cache, try loading from storage 235 + if let Ok(Some(relationship)) = self.storage.get_relationship(id).await { 236 + // Cache the loaded relationship 237 + self.relationships.insert(id, relationship.clone()); 238 + Some(relationship) 239 + } else { 240 + None 241 + } 242 + } 243 + 244 + pub fn get_node_relationships( 245 + &self, 246 + node_id: NodeId, 247 + direction: Direction, 248 + rel_types: Option<&[u32]>, 249 + ) -> Vec<Relationship> { 250 + let rel_ids = match self.node_relationships.get(&node_id) { 251 + Some(ids) => ids.clone(), 252 + None => return Vec::new(), 253 + }; 254 + 255 + rel_ids 256 + .into_iter() 257 + .filter_map(|rel_id| { 258 + self.relationships.get(&rel_id).and_then(|rel| { 259 + let matches_direction = match direction { 260 + Direction::Outgoing => rel.start_node == node_id, 261 + Direction::Incoming => rel.end_node == node_id, 262 + Direction::Both => true, 263 + }; 264 + 265 + let matches_type = rel_types 266 + .map(|types| types.contains(&rel.rel_type)) 267 + .unwrap_or(true); 268 + 269 + if matches_direction && matches_type { 270 + Some(rel.clone()) 271 + } else { 272 + None 273 + } 274 + }) 275 + }) 276 + .collect() 277 + } 278 + 279 + pub async fn delete_node(&self, id: NodeId) -> Result<()> { 280 + // Delete all relationships connected to this node 281 + if let Some((_, rel_ids)) = self.node_relationships.remove(&id) { 282 + for rel_id in rel_ids { 283 + self.delete_relationship(rel_id).await?; 284 + } 285 + } 286 + 287 + // Remove from cache 288 + self.nodes 289 + .remove(&id) 290 + .ok_or(GigabrainError::NodeNotFound(id))?; 291 + 292 + // Remove from storage if write-through is enabled 293 + if self.write_through { 294 + self.storage.delete_node(id).await?; 295 + } 296 + 297 + Ok(()) 298 + } 299 + 300 + pub async fn delete_relationship(&self, id: RelationshipId) -> Result<()> { 301 + // Remove from cache 302 + let rel = self 303 + .relationships 304 + .remove(&id) 305 + .ok_or(GigabrainError::RelationshipNotFound(id))? 306 + .1; 307 + 308 + // Update relationship indices 309 + self.node_relationships.entry(rel.start_node).and_modify(|rels| { 310 + rels.retain(|&r| r != id); 311 + }); 312 + 313 + if rel.start_node != rel.end_node { 314 + self.node_relationships.entry(rel.end_node).and_modify(|rels| { 315 + rels.retain(|&r| r != id); 316 + }); 317 + } 318 + 319 + // Remove from storage if write-through is enabled 320 + if self.write_through { 321 + self.storage.delete_relationship(id).await?; 322 + } 323 + 324 + Ok(()) 325 + } 326 + 327 + pub fn schema(&self) -> &Arc<RwLock<GraphSchema>> { 328 + &self.schema 329 + } 330 + 331 + /// Get all node IDs in the graph 332 + pub fn get_all_nodes(&self) -> Vec<NodeId> { 333 + self.nodes.iter().map(|entry| *entry.key()).collect() 334 + } 335 + 336 + /// Enable or disable write-through caching 337 + pub fn set_write_through(&mut self, enabled: bool) { 338 + self.write_through = enabled; 339 + } 340 + 341 + /// Flush all pending changes to storage 342 + pub async fn flush(&self) -> Result<()> { 343 + self.storage.flush().await 344 + } 345 + 346 + /// Close the persistent graph and storage 347 + pub async fn close(self) -> Result<()> { 348 + // Ensure all data is persisted before closing 349 + if self.write_through { 350 + self.persist_to_storage().await?; 351 + } 352 + 353 + self.storage.close().await 354 + } 355 + } 356 + 357 + // Implement the same interface as the original Graph for compatibility 358 + impl PersistentGraph { 359 + /// Legacy sync method for create_node (for compatibility) 360 + pub fn create_node_sync(&self) -> NodeId { 361 + // For backward compatibility, use blocking async 362 + tokio::task::block_in_place(|| { 363 + tokio::runtime::Handle::current().block_on(async { 364 + self.create_node().await.expect("Failed to create node") 365 + }) 366 + }) 367 + } 368 + 369 + /// Legacy sync method for update_node (for compatibility) 370 + pub fn update_node_sync<F>(&self, id: NodeId, update_fn: F) -> Result<()> 371 + where 372 + F: FnOnce(&mut Node), 373 + { 374 + // For backward compatibility, use blocking async 375 + tokio::task::block_in_place(|| { 376 + tokio::runtime::Handle::current().block_on(async { 377 + self.update_node(id, update_fn).await 378 + }) 379 + }) 380 + } 381 + 382 + /// Legacy sync method for create_relationship (for compatibility) 383 + pub fn create_relationship_sync( 384 + &self, 385 + start: NodeId, 386 + end: NodeId, 387 + rel_type: u32, 388 + ) -> Result<RelationshipId> { 389 + // For backward compatibility, use blocking async 390 + tokio::task::block_in_place(|| { 391 + tokio::runtime::Handle::current().block_on(async { 392 + self.create_relationship(start, end, rel_type).await 393 + }) 394 + }) 395 + } 396 + 397 + /// Legacy sync method for get_node (for compatibility) 398 + pub fn get_node_sync(&self, id: NodeId) -> Option<Node> { 399 + // For backward compatibility, use blocking async 400 + tokio::task::block_in_place(|| { 401 + tokio::runtime::Handle::current().block_on(async { 402 + self.get_node(id).await 403 + }) 404 + }) 405 + } 406 + 407 + /// Legacy sync method for get_relationship (for compatibility) 408 + pub fn get_relationship_sync(&self, id: RelationshipId) -> Option<Relationship> { 409 + // For backward compatibility, use blocking async 410 + tokio::task::block_in_place(|| { 411 + tokio::runtime::Handle::current().block_on(async { 412 + self.get_relationship(id).await 413 + }) 414 + }) 415 + } 416 + } 417 + 418 + #[cfg(test)] 419 + mod tests { 420 + use super::*; 421 + #[cfg(feature = "rocksdb-storage")] 422 + use crate::persistence::rocksdb_store::RocksDBStore; 423 + use tempfile::tempdir; 424 + 425 + #[tokio::test] 426 + #[cfg(feature = "rocksdb-storage")] 427 + async fn test_persistent_graph() -> Result<()> { 428 + let temp_dir = tempdir().unwrap(); 429 + let storage_backend = Arc::new(crate::persistence::rocksdb_store::RocksDBStore::new(temp_dir.path())?); 430 + let graph = PersistentGraph::new(storage_backend).await?; 431 + 432 + // Create nodes 433 + let node1 = graph.create_node().await?; 434 + let node2 = graph.create_node().await?; 435 + 436 + // Update a node with properties 437 + graph.update_node(node1, |node| { 438 + let schema = graph.schema(); 439 + let mut schema_guard = schema.write(); 440 + let name_prop = schema_guard.get_or_create_property_key("name"); 441 + node.properties.insert(name_prop, crate::core::PropertyValue::String("Alice".to_string())); 442 + }).await?; 443 + 444 + // Create relationship 445 + let schema = graph.schema(); 446 + let mut schema_guard = schema.write(); 447 + let knows_rel = schema_guard.get_or_create_relationship_type("KNOWS"); 448 + drop(schema_guard); 449 + 450 + let rel_id = graph.create_relationship(node1, node2, knows_rel).await?; 451 + 452 + // Verify nodes and relationships exist 453 + assert!(graph.get_node(node1).await.is_some()); 454 + assert!(graph.get_node(node2).await.is_some()); 455 + assert!(graph.get_relationship(rel_id).await.is_some()); 456 + 457 + // Test relationship queries 458 + let relationships = graph.get_node_relationships(node1, Direction::Outgoing, None); 459 + assert_eq!(relationships.len(), 1); 460 + assert_eq!(relationships[0].id, rel_id); 461 + 462 + // Close and reopen to test persistence 463 + graph.close().await?; 464 + 465 + let storage_backend2 = Arc::new(RocksDBStore::new(temp_dir.path())?); 466 + let graph2 = PersistentGraph::new(storage_backend2).await?; 467 + 468 + // Verify data persisted 469 + assert!(graph2.get_node(node1).await.is_some()); 470 + assert!(graph2.get_node(node2).await.is_some()); 471 + assert!(graph2.get_relationship(rel_id).await.is_some()); 472 + 473 + let relationships2 = graph2.get_node_relationships(node1, Direction::Outgoing, None); 474 + assert_eq!(relationships2.len(), 1); 475 + assert_eq!(relationships2[0].id, rel_id); 476 + 477 + Ok(()) 478 + } 479 + }
+53
src/core/property.rs
··· 15 15 16 16 impl Eq for PropertyValue {} 17 17 18 + impl PartialOrd for PropertyValue { 19 + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { 20 + Some(self.cmp(other)) 21 + } 22 + } 23 + 24 + impl Ord for PropertyValue { 25 + fn cmp(&self, other: &Self) -> std::cmp::Ordering { 26 + use std::cmp::Ordering; 27 + use PropertyValue::*; 28 + 29 + // First compare by type for consistent ordering 30 + let self_type_order = self.type_order(); 31 + let other_type_order = other.type_order(); 32 + 33 + match self_type_order.cmp(&other_type_order) { 34 + Ordering::Equal => { 35 + // Same type, compare values 36 + match (self, other) { 37 + (Null, Null) => Ordering::Equal, 38 + (Boolean(a), Boolean(b)) => a.cmp(b), 39 + (Integer(a), Integer(b)) => a.cmp(b), 40 + (Float(a), Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal), 41 + (String(a), String(b)) => a.cmp(b), 42 + (List(a), List(b)) => a.cmp(b), 43 + (Map(a), Map(b)) => { 44 + // Convert to sorted vectors for comparison 45 + let mut a_sorted: Vec<_> = a.iter().collect(); 46 + let mut b_sorted: Vec<_> = b.iter().collect(); 47 + a_sorted.sort_by_key(|(k, _)| *k); 48 + b_sorted.sort_by_key(|(k, _)| *k); 49 + a_sorted.cmp(&b_sorted) 50 + }, 51 + _ => unreachable!("Types should be equal due to type order check"), 52 + } 53 + }, 54 + other_ordering => other_ordering, 55 + } 56 + } 57 + } 58 + 18 59 impl std::hash::Hash for PropertyValue { 19 60 fn hash<H: std::hash::Hasher>(&self, state: &mut H) { 20 61 match self { ··· 60 101 PropertyValue::String(_) => "string", 61 102 PropertyValue::List(_) => "list", 62 103 PropertyValue::Map(_) => "map", 104 + } 105 + } 106 + 107 + fn type_order(&self) -> u8 { 108 + match self { 109 + PropertyValue::Null => 0, 110 + PropertyValue::Boolean(_) => 1, 111 + PropertyValue::Integer(_) => 2, 112 + PropertyValue::Float(_) => 3, 113 + PropertyValue::String(_) => 4, 114 + PropertyValue::List(_) => 5, 115 + PropertyValue::Map(_) => 6, 63 116 } 64 117 } 65 118 }
+411 -26
src/cypher/executor.rs
··· 1 1 use crate::cypher::planner::{QueryPlan, ScanPlan, CreatePlan, ProjectPlan}; 2 2 use crate::cypher::ast::{CypherQuery, MatchClause, CreateClause, ReturnClause, Expression, PatternElement}; 3 3 use crate::core::{Graph, PropertyValue}; 4 + use crate::index::{IndexQuery, IndexType}; 4 5 use crate::{Result, GigabrainError, NodeId, RelationshipId}; 5 6 use std::sync::Arc; 6 7 use std::collections::HashMap; 8 + use tracing::{debug, info}; 7 9 8 10 pub struct QueryExecutor { 9 11 graph: Arc<Graph>, ··· 99 101 } 100 102 } 101 103 104 + // Apply WHERE clause filtering if present 105 + if let Some(where_expr) = &match_clause.where_clause { 106 + self.apply_where_filter(context, where_expr).await?; 107 + } 108 + 102 109 Ok(QueryResult::empty()) 103 110 } 104 111 112 + async fn apply_where_filter(&self, context: &mut ExecutionContext, where_expr: &Expression) -> Result<()> { 113 + // Create a new context to store filtered results 114 + let mut filtered_context = ExecutionContext::new(); 115 + 116 + // For each variable binding, evaluate the WHERE clause 117 + let variable_names: Vec<String> = context.variables.keys().cloned().collect(); 118 + 119 + if variable_names.is_empty() { 120 + return Ok(()); 121 + } 122 + 123 + // Get all possible combinations of variable bindings 124 + let combinations = self.generate_binding_combinations(context).await?; 125 + 126 + // Filter combinations based on WHERE clause 127 + let mut filtered_combinations = Vec::new(); 128 + for combination in combinations { 129 + // Create a temporary context with this specific binding combination 130 + let mut temp_context = ExecutionContext::new(); 131 + for (var_name, binding) in &combination { 132 + temp_context.bind_variable(var_name.clone(), binding.clone()); 133 + } 134 + 135 + // Evaluate WHERE clause with this binding 136 + let result = self.evaluate_expression(where_expr, &temp_context, None).await?; 137 + 138 + if self.is_truthy(&result) { 139 + filtered_combinations.push(combination); 140 + } 141 + } 142 + 143 + // Rebuild the context with filtered results 144 + self.rebuild_context_from_combinations(context, filtered_combinations); 145 + 146 + Ok(()) 147 + } 148 + 149 + async fn generate_binding_combinations(&self, context: &ExecutionContext) -> Result<Vec<HashMap<String, VariableBinding>>> { 150 + let mut combinations = Vec::new(); 151 + let variable_names: Vec<String> = context.variables.keys().cloned().collect(); 152 + 153 + if variable_names.is_empty() { 154 + return Ok(combinations); 155 + } 156 + 157 + // Generate all combinations of variable bindings 158 + // This is a simplified approach - for performance, we'd use join algorithms 159 + if variable_names.len() == 1 { 160 + let var_name = &variable_names[0]; 161 + if let Some(binding) = context.variables.get(var_name) { 162 + match binding { 163 + VariableBinding::Nodes(node_ids) => { 164 + for &node_id in node_ids { 165 + let mut combination = HashMap::new(); 166 + combination.insert(var_name.clone(), VariableBinding::Nodes(vec![node_id])); 167 + combinations.push(combination); 168 + } 169 + }, 170 + VariableBinding::Relationships(rel_ids) => { 171 + for &rel_id in rel_ids { 172 + let mut combination = HashMap::new(); 173 + combination.insert(var_name.clone(), VariableBinding::Relationships(vec![rel_id])); 174 + combinations.push(combination); 175 + } 176 + } 177 + } 178 + } 179 + } else { 180 + // For multiple variables, generate cartesian product 181 + // This is simplified - real implementation would be more efficient 182 + self.generate_cartesian_product(context, &variable_names, 0, &mut HashMap::new(), &mut combinations); 183 + } 184 + 185 + Ok(combinations) 186 + } 187 + 188 + fn generate_cartesian_product( 189 + &self, 190 + context: &ExecutionContext, 191 + variable_names: &[String], 192 + index: usize, 193 + current_combination: &mut HashMap<String, VariableBinding>, 194 + combinations: &mut Vec<HashMap<String, VariableBinding>> 195 + ) { 196 + if index >= variable_names.len() { 197 + combinations.push(current_combination.clone()); 198 + return; 199 + } 200 + 201 + let var_name = &variable_names[index]; 202 + if let Some(binding) = context.variables.get(var_name) { 203 + match binding { 204 + VariableBinding::Nodes(node_ids) => { 205 + for &node_id in node_ids { 206 + current_combination.insert(var_name.clone(), VariableBinding::Nodes(vec![node_id])); 207 + self.generate_cartesian_product(context, variable_names, index + 1, current_combination, combinations); 208 + } 209 + }, 210 + VariableBinding::Relationships(rel_ids) => { 211 + for &rel_id in rel_ids { 212 + current_combination.insert(var_name.clone(), VariableBinding::Relationships(vec![rel_id])); 213 + self.generate_cartesian_product(context, variable_names, index + 1, current_combination, combinations); 214 + } 215 + } 216 + } 217 + } 218 + } 219 + 220 + fn rebuild_context_from_combinations(&self, context: &mut ExecutionContext, combinations: Vec<HashMap<String, VariableBinding>>) { 221 + // Clear current context 222 + context.variables.clear(); 223 + 224 + // Group combinations back into variable bindings 225 + let mut var_to_nodes: HashMap<String, Vec<NodeId>> = HashMap::new(); 226 + let mut var_to_rels: HashMap<String, Vec<RelationshipId>> = HashMap::new(); 227 + 228 + for combination in combinations { 229 + for (var_name, binding) in combination { 230 + match binding { 231 + VariableBinding::Nodes(node_ids) => { 232 + var_to_nodes.entry(var_name).or_insert_with(Vec::new).extend(node_ids); 233 + }, 234 + VariableBinding::Relationships(rel_ids) => { 235 + var_to_rels.entry(var_name).or_insert_with(Vec::new).extend(rel_ids); 236 + } 237 + } 238 + } 239 + } 240 + 241 + // Rebuild context with filtered bindings 242 + for (var_name, node_ids) in var_to_nodes { 243 + context.bind_variable(var_name, VariableBinding::Nodes(node_ids)); 244 + } 245 + 246 + for (var_name, rel_ids) in var_to_rels { 247 + context.bind_variable(var_name, VariableBinding::Relationships(rel_ids)); 248 + } 249 + } 250 + 105 251 async fn find_matching_nodes(&self, node_pattern: &crate::cypher::ast::NodePattern) -> Result<Vec<NodeId>> { 106 - // For now, return all nodes - in reality this would filter by labels and properties 107 - let all_nodes = self.graph.get_all_nodes(); 252 + debug!("Finding nodes matching pattern: labels={:?}, properties={:?}", 253 + node_pattern.labels, node_pattern.properties); 254 + 255 + let mut candidate_nodes: Option<Vec<NodeId>> = None; 256 + 257 + // Use label indexes if labels are specified 258 + if !node_pattern.labels.is_empty() { 259 + debug!("Using label indexes for labels: {:?}", node_pattern.labels); 260 + let mut label_matches = Vec::new(); 261 + 262 + for label_name in &node_pattern.labels { 263 + match self.graph.find_nodes_by_label(label_name) { 264 + Ok(nodes) => { 265 + if label_matches.is_empty() { 266 + label_matches = nodes; 267 + } else { 268 + // Intersect with previous results (AND logic for multiple labels) 269 + label_matches.retain(|node_id| nodes.contains(node_id)); 270 + } 271 + }, 272 + Err(e) => { 273 + debug!("Failed to use label index for '{}': {}", label_name, e); 274 + // Fall back to scanning all nodes 275 + label_matches = self.graph.get_all_nodes(); 276 + break; 277 + } 278 + } 279 + } 280 + 281 + candidate_nodes = Some(label_matches); 282 + info!("Label index returned {} candidate nodes", candidate_nodes.as_ref().unwrap().len()); 283 + } 284 + 285 + // Use property indexes if properties are specified 286 + if let Some(ref properties) = node_pattern.properties { 287 + debug!("Using property indexes for {} properties", properties.len()); 288 + let mut property_matches: Option<Vec<NodeId>> = None; 289 + 290 + for (prop_name, value_expr) in properties { 291 + // For now, only handle literal values in property patterns 292 + if let Expression::Literal(prop_value) = value_expr { 293 + match self.graph.find_nodes_by_property(prop_name, prop_value) { 294 + Ok(nodes) => { 295 + if let Some(ref mut existing_matches) = property_matches { 296 + // Intersect with previous property matches 297 + existing_matches.retain(|node_id| nodes.contains(node_id)); 298 + } else { 299 + property_matches = Some(nodes); 300 + } 301 + }, 302 + Err(e) => { 303 + debug!("Failed to use property index for '{}': {}", prop_name, e); 304 + // Continue with other properties or fall back to scanning 305 + } 306 + } 307 + } 308 + } 309 + 310 + if let Some(prop_nodes) = property_matches { 311 + if let Some(ref mut candidates) = candidate_nodes { 312 + // Intersect label matches with property matches 313 + candidates.retain(|node_id| prop_nodes.contains(node_id)); 314 + } else { 315 + candidate_nodes = Some(prop_nodes); 316 + } 317 + info!("Property index intersection returned {} candidate nodes", 318 + candidate_nodes.as_ref().unwrap().len()); 319 + } 320 + } 321 + 322 + // If no indexes were used, scan all nodes 323 + let result_nodes = candidate_nodes.unwrap_or_else(|| { 324 + debug!("No indexes available, scanning all nodes"); 325 + self.graph.get_all_nodes() 326 + }); 108 327 109 - // TODO: Filter by labels if specified 110 - // TODO: Filter by properties if specified 328 + // TODO: Apply additional filtering for complex property expressions 329 + // that couldn't be handled by indexes 111 330 112 - Ok(all_nodes) 331 + debug!("Final result: {} nodes matched the pattern", result_nodes.len()); 332 + Ok(result_nodes) 113 333 } 114 334 115 335 async fn get_all_relationships(&self) -> Result<Vec<RelationshipId>> { ··· 183 403 184 404 created_nodes.push(node_id); 185 405 406 + // Add to indexes after creating the node 407 + if let Some(node) = self.graph.get_node(node_id) { 408 + let labels: Vec<_> = node.labels.iter().cloned().collect(); 409 + let properties = node.properties.clone(); 410 + 411 + // Ensure indexes exist for any new labels 412 + for &label_id in &labels { 413 + let index_type = IndexType::Label(label_id); 414 + if let Err(e) = self.graph.index_manager().create_index(index_type, None, false) { 415 + debug!("Label index may already exist or failed to create: {}", e); 416 + } 417 + } 418 + 419 + // Ensure indexes exist for any new properties 420 + for &property_key_id in properties.keys() { 421 + let index_type = IndexType::Property(property_key_id); 422 + if let Err(e) = self.graph.index_manager().create_index(index_type, None, false) { 423 + debug!("Property index may already exist or failed to create: {}", e); 424 + } 425 + } 426 + 427 + // Add node to indexes 428 + if let Err(e) = self.graph.index_manager().add_node(node_id, &labels, &properties) { 429 + debug!("Failed to add created node to indexes: {}", e); 430 + } else { 431 + debug!("Added newly created node {} to indexes", node_id.0); 432 + } 433 + } 434 + 186 435 // Store node variable mapping for relationships 187 436 if let Some(var_name) = &node_pattern.variable { 188 437 node_variables.insert(var_name.clone(), node_id); ··· 300 549 Ok(QueryResult { rows, columns }) 301 550 } 302 551 303 - async fn evaluate_expression(&self, expr: &Expression, context: &ExecutionContext, current_node: Option<NodeId>) -> Result<Value> { 304 - match expr { 305 - Expression::Variable(var_name) => { 306 - if let Some(binding) = context.variables.get(var_name) { 307 - match binding { 308 - VariableBinding::Nodes(nodes) => { 309 - if let Some(node_id) = current_node { 310 - Ok(Value::Node(node_id)) 311 - } else if let Some(&first_node) = nodes.first() { 312 - Ok(Value::Node(first_node)) 313 - } else { 314 - Ok(Value::Null) 552 + fn evaluate_expression<'a>(&'a self, expr: &'a Expression, context: &'a ExecutionContext, current_node: Option<NodeId>) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Value>> + 'a>> { 553 + Box::pin(async move { 554 + match expr { 555 + Expression::Variable(var_name) => { 556 + if let Some(binding) = context.variables.get(var_name) { 557 + match binding { 558 + VariableBinding::Nodes(nodes) => { 559 + if let Some(node_id) = current_node { 560 + Ok(Value::Node(node_id)) 561 + } else if let Some(&first_node) = nodes.first() { 562 + Ok(Value::Node(first_node)) 563 + } else { 564 + Ok(Value::Null) 565 + } 566 + }, 567 + VariableBinding::Relationships(rels) => { 568 + if let Some(&first_rel) = rels.first() { 569 + Ok(Value::Relationship(first_rel)) 570 + } else { 571 + Ok(Value::Null) 572 + } 315 573 } 316 - }, 317 - VariableBinding::Relationships(rels) => { 318 - if let Some(&first_rel) = rels.first() { 319 - Ok(Value::Relationship(first_rel)) 320 - } else { 321 - Ok(Value::Null) 574 + } 575 + } else { 576 + Ok(Value::Null) 577 + } 578 + }, 579 + Expression::Property(prop_expr) => { 580 + let base_value = self.evaluate_expression(&prop_expr.expression, context, current_node).await?; 581 + self.get_property_value(base_value, &prop_expr.property).await 582 + }, 583 + Expression::Literal(literal) => { 584 + Ok(self.property_value_to_executor_value(literal)) 585 + }, 586 + Expression::Equal(left, right) => { 587 + let left_val = self.evaluate_expression(left, context, current_node).await?; 588 + let right_val = self.evaluate_expression(right, context, current_node).await?; 589 + Ok(Value::Boolean(self.values_equal(&left_val, &right_val))) 590 + }, 591 + Expression::NotEqual(left, right) => { 592 + let left_val = self.evaluate_expression(left, context, current_node).await?; 593 + let right_val = self.evaluate_expression(right, context, current_node).await?; 594 + Ok(Value::Boolean(!self.values_equal(&left_val, &right_val))) 595 + }, 596 + Expression::And(left, right) => { 597 + let left_val = self.evaluate_expression(left, context, current_node).await?; 598 + let right_val = self.evaluate_expression(right, context, current_node).await?; 599 + Ok(Value::Boolean(self.is_truthy(&left_val) && self.is_truthy(&right_val))) 600 + }, 601 + Expression::Or(left, right) => { 602 + let left_val = self.evaluate_expression(left, context, current_node).await?; 603 + let right_val = self.evaluate_expression(right, context, current_node).await?; 604 + Ok(Value::Boolean(self.is_truthy(&left_val) || self.is_truthy(&right_val))) 605 + }, 606 + Expression::Not(expr) => { 607 + let val = self.evaluate_expression(expr, context, current_node).await?; 608 + Ok(Value::Boolean(!self.is_truthy(&val))) 609 + }, 610 + Expression::LessThan(left, right) => { 611 + let left_val = self.evaluate_expression(left, context, current_node).await?; 612 + let right_val = self.evaluate_expression(right, context, current_node).await?; 613 + Ok(Value::Boolean(self.compare_values(&left_val, &right_val) < 0)) 614 + }, 615 + Expression::LessThanOrEqual(left, right) => { 616 + let left_val = self.evaluate_expression(left, context, current_node).await?; 617 + let right_val = self.evaluate_expression(right, context, current_node).await?; 618 + Ok(Value::Boolean(self.compare_values(&left_val, &right_val) <= 0)) 619 + }, 620 + Expression::GreaterThan(left, right) => { 621 + let left_val = self.evaluate_expression(left, context, current_node).await?; 622 + let right_val = self.evaluate_expression(right, context, current_node).await?; 623 + Ok(Value::Boolean(self.compare_values(&left_val, &right_val) > 0)) 624 + }, 625 + Expression::GreaterThanOrEqual(left, right) => { 626 + let left_val = self.evaluate_expression(left, context, current_node).await?; 627 + let right_val = self.evaluate_expression(right, context, current_node).await?; 628 + Ok(Value::Boolean(self.compare_values(&left_val, &right_val) >= 0)) 629 + }, 630 + _ => Ok(Value::Null), // TODO: Implement remaining expression types 631 + } 632 + }) 633 + } 634 + 635 + async fn get_property_value(&self, base_value: Value, property_name: &str) -> Result<Value> { 636 + match base_value { 637 + Value::Node(node_id) => { 638 + if let Some(node) = self.graph.get_node(node_id) { 639 + // Get property value from node 640 + let schema = self.graph.schema(); 641 + let schema_guard = schema.read(); 642 + for (prop_key, _) in &schema_guard.property_keys { 643 + if prop_key == property_name { 644 + let prop_key_id = schema_guard.property_keys[prop_key]; 645 + if let Some(prop_value) = node.properties.get(&prop_key_id) { 646 + return Ok(self.property_value_to_executor_value(prop_value)); 322 647 } 323 648 } 324 649 } 325 - } else { 326 - Ok(Value::Null) 327 650 } 651 + Ok(Value::Null) 328 652 }, 329 - _ => Ok(Value::Null), // TODO: Implement other expression types 653 + Value::Relationship(_rel_id) => { 654 + // TODO: Implement relationship property access 655 + Ok(Value::Null) 656 + }, 657 + _ => Ok(Value::Null), 658 + } 659 + } 660 + 661 + fn property_value_to_executor_value(&self, prop_val: &PropertyValue) -> Value { 662 + match prop_val { 663 + PropertyValue::String(s) => Value::String(s.clone()), 664 + PropertyValue::Integer(i) => Value::Integer(*i), 665 + PropertyValue::Float(f) => Value::Float(*f), 666 + PropertyValue::Boolean(b) => Value::Boolean(*b), 667 + PropertyValue::List(list) => { 668 + let converted_list: Vec<Value> = list.iter() 669 + .map(|item| self.property_value_to_executor_value(item)) 670 + .collect(); 671 + Value::List(converted_list) 672 + }, 673 + PropertyValue::Null => Value::Null, 674 + PropertyValue::Map(_) => Value::Null, // TODO: Implement map support 675 + } 676 + } 677 + 678 + fn values_equal(&self, left: &Value, right: &Value) -> bool { 679 + match (left, right) { 680 + (Value::String(a), Value::String(b)) => a == b, 681 + (Value::Integer(a), Value::Integer(b)) => a == b, 682 + (Value::Float(a), Value::Float(b)) => (a - b).abs() < f64::EPSILON, 683 + (Value::Boolean(a), Value::Boolean(b)) => a == b, 684 + (Value::Node(a), Value::Node(b)) => a == b, 685 + (Value::Relationship(a), Value::Relationship(b)) => a == b, 686 + (Value::Null, Value::Null) => true, 687 + // Type coercion for numbers 688 + (Value::Integer(a), Value::Float(b)) => (*a as f64 - b).abs() < f64::EPSILON, 689 + (Value::Float(a), Value::Integer(b)) => (a - *b as f64).abs() < f64::EPSILON, 690 + _ => false, 691 + } 692 + } 693 + 694 + fn is_truthy(&self, value: &Value) -> bool { 695 + match value { 696 + Value::Boolean(b) => *b, 697 + Value::Null => false, 698 + Value::Integer(i) => *i != 0, 699 + Value::Float(f) => *f != 0.0, 700 + Value::String(s) => !s.is_empty(), 701 + Value::List(list) => !list.is_empty(), 702 + Value::Node(_) => true, 703 + Value::Relationship(_) => true, 704 + } 705 + } 706 + 707 + fn compare_values(&self, left: &Value, right: &Value) -> i32 { 708 + match (left, right) { 709 + (Value::Integer(a), Value::Integer(b)) => a.cmp(b) as i32, 710 + (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal) as i32, 711 + (Value::Integer(a), Value::Float(b)) => (*a as f64).partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal) as i32, 712 + (Value::Float(a), Value::Integer(b)) => a.partial_cmp(&(*b as f64)).unwrap_or(std::cmp::Ordering::Equal) as i32, 713 + (Value::String(a), Value::String(b)) => a.cmp(b) as i32, 714 + _ => 0, // Incomparable types are considered equal 330 715 } 331 716 } 332 717
+6
src/error.rs
··· 42 42 ValidationError(String), 43 43 } 44 44 45 + impl From<crate::index::types::IndexError> for GigabrainError { 46 + fn from(err: crate::index::types::IndexError) -> Self { 47 + GigabrainError::Index(err.to_string()) 48 + } 49 + } 50 + 45 51 pub type Result<T> = std::result::Result<T, GigabrainError>;
+510
src/index/composite_index.rs
··· 1 + use std::collections::{BTreeMap, HashMap}; 2 + use std::sync::Arc; 3 + use roaring::RoaringBitmap; 4 + use dashmap::DashMap; 5 + use parking_lot::RwLock; 6 + use crate::{NodeId, PropertyKeyId, Result}; 7 + use crate::core::PropertyValue; 8 + use crate::index::types::{IndexQuery, IndexQueryResult, IndexQueryStats, IndexError, IndexStats}; 9 + use chrono; 10 + use tracing::{debug, warn, instrument}; 11 + use std::time::Instant; 12 + 13 + /// Composite key for multi-property indexing 14 + #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] 15 + pub struct CompositeKey(Vec<PropertyValue>); 16 + 17 + impl CompositeKey { 18 + pub fn new(values: Vec<PropertyValue>) -> Self { 19 + Self(values) 20 + } 21 + 22 + pub fn values(&self) -> &[PropertyValue] { 23 + &self.0 24 + } 25 + 26 + /// Check if this composite key matches a partial key (prefix match) 27 + pub fn matches_prefix(&self, partial: &[PropertyValue]) -> bool { 28 + if partial.len() > self.0.len() { 29 + return false; 30 + } 31 + 32 + for (i, value) in partial.iter().enumerate() { 33 + if self.0[i] != *value { 34 + return false; 35 + } 36 + } 37 + 38 + true 39 + } 40 + 41 + /// Check if this composite key is within a range for specific properties 42 + pub fn matches_range(&self, property_ranges: &[(usize, Option<PropertyValue>, Option<PropertyValue>, bool, bool)]) -> bool { 43 + for &(prop_index, ref min, ref max, inclusive_min, inclusive_max) in property_ranges { 44 + if prop_index >= self.0.len() { 45 + return false; 46 + } 47 + 48 + let value = &self.0[prop_index]; 49 + 50 + // Check minimum bound 51 + if let Some(min_val) = min { 52 + if inclusive_min { 53 + if value < min_val { 54 + return false; 55 + } 56 + } else { 57 + if value <= min_val { 58 + return false; 59 + } 60 + } 61 + } 62 + 63 + // Check maximum bound 64 + if let Some(max_val) = max { 65 + if inclusive_max { 66 + if value > max_val { 67 + return false; 68 + } 69 + } else { 70 + if value >= max_val { 71 + return false; 72 + } 73 + } 74 + } 75 + } 76 + 77 + true 78 + } 79 + } 80 + 81 + /// Composite index for efficient multi-property queries 82 + pub struct CompositeIndex { 83 + /// The property keys that make up this composite index 84 + property_keys: Vec<PropertyKeyId>, 85 + 86 + /// Hash index for exact composite key lookups - O(1) average case 87 + exact_index: Arc<DashMap<CompositeKey, RoaringBitmap>>, 88 + 89 + /// Range index for ordered composite keys - O(log n) range queries 90 + range_index: Arc<RwLock<BTreeMap<CompositeKey, RoaringBitmap>>>, 91 + 92 + /// Statistics tracking 93 + stats: Arc<RwLock<IndexStats>>, 94 + 95 + /// Configuration 96 + unique: bool, 97 + } 98 + 99 + impl CompositeIndex { 100 + pub fn new(property_keys: Vec<PropertyKeyId>, unique: bool) -> Self { 101 + Self { 102 + property_keys, 103 + exact_index: Arc::new(DashMap::new()), 104 + range_index: Arc::new(RwLock::new(BTreeMap::new())), 105 + stats: Arc::new(RwLock::new(IndexStats::default())), 106 + unique, 107 + } 108 + } 109 + 110 + /// Get the property keys for this composite index 111 + pub fn property_keys(&self) -> &[PropertyKeyId] { 112 + &self.property_keys 113 + } 114 + 115 + /// Add a node to the composite index 116 + #[instrument(skip(self), level = "debug")] 117 + pub fn add_node(&self, node_id: NodeId, properties: &HashMap<PropertyKeyId, PropertyValue>) -> Result<()> { 118 + let start = Instant::now(); 119 + 120 + // Extract the composite key values 121 + let mut key_values = Vec::new(); 122 + for &property_key in &self.property_keys { 123 + if let Some(value) = properties.get(&property_key) { 124 + key_values.push(value.clone()); 125 + } else { 126 + // Missing property value - cannot add to composite index 127 + debug!("Node {} missing property {} for composite index", node_id.0, property_key.0); 128 + return Ok(()); 129 + } 130 + } 131 + 132 + let composite_key = CompositeKey::new(key_values); 133 + 134 + // Check unique constraint if enabled 135 + if self.unique && self.has_key(&composite_key)? { 136 + return Err(IndexError::UniqueConstraintViolation( 137 + format!("Composite key {:?}", composite_key.values()) 138 + ).into()); 139 + } 140 + 141 + // Add to exact index 142 + self.exact_index 143 + .entry(composite_key.clone()) 144 + .or_insert_with(RoaringBitmap::new) 145 + .insert(node_id.0 as u32); 146 + 147 + // Add to range index 148 + let mut range_index = self.range_index.write(); 149 + range_index 150 + .entry(composite_key.clone()) 151 + .or_insert_with(RoaringBitmap::new) 152 + .insert(node_id.0 as u32); 153 + 154 + // Update statistics 155 + self.update_stats_add(); 156 + 157 + debug!( 158 + property_keys = ?self.property_keys, 159 + node_id = %node_id.0, 160 + composite_key = ?composite_key.values(), 161 + duration_micros = start.elapsed().as_micros(), 162 + "Added node to composite index" 163 + ); 164 + 165 + Ok(()) 166 + } 167 + 168 + /// Remove a node from the composite index 169 + #[instrument(skip(self), level = "debug")] 170 + pub fn remove_node(&self, node_id: NodeId, properties: &HashMap<PropertyKeyId, PropertyValue>) -> Result<()> { 171 + // Extract the composite key values 172 + let mut key_values = Vec::new(); 173 + for &property_key in &self.property_keys { 174 + if let Some(value) = properties.get(&property_key) { 175 + key_values.push(value.clone()); 176 + } else { 177 + // Missing property value - nothing to remove 178 + return Ok(()); 179 + } 180 + } 181 + 182 + let composite_key = CompositeKey::new(key_values); 183 + 184 + // Remove from exact index 185 + if let Some(mut bitmap) = self.exact_index.get_mut(&composite_key) { 186 + bitmap.remove(node_id.0 as u32); 187 + if bitmap.is_empty() { 188 + drop(bitmap); 189 + self.exact_index.remove(&composite_key); 190 + } 191 + } 192 + 193 + // Remove from range index 194 + let mut range_index = self.range_index.write(); 195 + if let Some(bitmap) = range_index.get_mut(&composite_key) { 196 + bitmap.remove(node_id.0 as u32); 197 + if bitmap.is_empty() { 198 + range_index.remove(&composite_key); 199 + } 200 + } 201 + 202 + // Update statistics 203 + self.update_stats_remove(); 204 + 205 + debug!( 206 + property_keys = ?self.property_keys, 207 + node_id = %node_id.0, 208 + composite_key = ?composite_key.values(), 209 + "Removed node from composite index" 210 + ); 211 + 212 + Ok(()) 213 + } 214 + 215 + /// Query the composite index 216 + #[instrument(skip(self), level = "debug")] 217 + pub fn query(&self, query: &CompositeIndexQuery) -> Result<IndexQueryResult> { 218 + let start = Instant::now(); 219 + let mut stats = IndexQueryStats::default(); 220 + stats.index_used = true; 221 + 222 + let nodes = match query { 223 + CompositeIndexQuery::Exact(values) => { 224 + debug!("Executing exact composite query for values: {:?}", values); 225 + self.query_exact(values)? 226 + }, 227 + CompositeIndexQuery::Prefix(partial_values) => { 228 + debug!("Executing prefix composite query for values: {:?}", partial_values); 229 + self.query_prefix(partial_values)? 230 + }, 231 + CompositeIndexQuery::Range(property_ranges) => { 232 + debug!("Executing range composite query"); 233 + self.query_range(property_ranges)? 234 + }, 235 + CompositeIndexQuery::In(value_sets) => { 236 + debug!("Executing IN composite query for {} sets", value_sets.len()); 237 + self.query_in(value_sets)? 238 + }, 239 + }; 240 + 241 + stats.execution_time_micros = start.elapsed().as_micros() as u64; 242 + stats.nodes_returned = nodes.len() as u64; 243 + 244 + // Update query statistics 245 + self.update_query_stats(); 246 + 247 + debug!( 248 + property_keys = ?self.property_keys, 249 + nodes_returned = nodes.len(), 250 + duration_micros = stats.execution_time_micros, 251 + "Composite index query completed" 252 + ); 253 + 254 + Ok(IndexQueryResult { nodes, stats }) 255 + } 256 + 257 + /// Check if a composite key exists in the index 258 + pub fn has_key(&self, key: &CompositeKey) -> Result<bool> { 259 + Ok(self.exact_index.get(key).map_or(false, |bitmap| !bitmap.is_empty())) 260 + } 261 + 262 + /// Get current index statistics 263 + pub fn get_stats(&self) -> IndexStats { 264 + self.stats.read().clone() 265 + } 266 + 267 + /// Rebuild the entire composite index 268 + pub fn rebuild(&self) -> Result<()> { 269 + debug!(property_keys = ?self.property_keys, "Rebuilding composite index"); 270 + 271 + // Clear all indexes 272 + self.exact_index.clear(); 273 + self.range_index.write().clear(); 274 + 275 + // Reset statistics 276 + let mut stats = self.stats.write(); 277 + stats.last_rebuild = Some(chrono::Utc::now()); 278 + stats.total_nodes = 0; 279 + stats.unique_values = 0; 280 + 281 + Ok(()) 282 + } 283 + 284 + // Private helper methods 285 + 286 + fn query_exact(&self, values: &[PropertyValue]) -> Result<Vec<NodeId>> { 287 + if values.len() != self.property_keys.len() { 288 + return Err(IndexError::InvalidQuery( 289 + format!("Composite key length mismatch: expected {}, got {}", 290 + self.property_keys.len(), values.len()) 291 + ).into()); 292 + } 293 + 294 + let composite_key = CompositeKey::new(values.to_vec()); 295 + Ok(self.exact_index 296 + .get(&composite_key) 297 + .map(|bitmap| bitmap.iter().map(|id| NodeId(id as u64)).collect()) 298 + .unwrap_or_default()) 299 + } 300 + 301 + fn query_prefix(&self, partial_values: &[PropertyValue]) -> Result<Vec<NodeId>> { 302 + if partial_values.len() > self.property_keys.len() { 303 + return Err(IndexError::InvalidQuery( 304 + format!("Prefix length exceeds composite key length: {} > {}", 305 + partial_values.len(), self.property_keys.len()) 306 + ).into()); 307 + } 308 + 309 + let mut result = RoaringBitmap::new(); 310 + 311 + // Search through all keys for prefix matches 312 + for entry in self.exact_index.iter() { 313 + if entry.key().matches_prefix(partial_values) { 314 + result |= &*entry.value(); 315 + } 316 + } 317 + 318 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 319 + } 320 + 321 + fn query_range(&self, property_ranges: &[(usize, Option<PropertyValue>, Option<PropertyValue>, bool, bool)]) -> Result<Vec<NodeId>> { 322 + let range_index = self.range_index.read(); 323 + let mut result = RoaringBitmap::new(); 324 + 325 + // Search through all keys for range matches 326 + for (key, bitmap) in range_index.iter() { 327 + if key.matches_range(property_ranges) { 328 + result |= bitmap; 329 + } 330 + } 331 + 332 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 333 + } 334 + 335 + fn query_in(&self, value_sets: &[Vec<PropertyValue>]) -> Result<Vec<NodeId>> { 336 + let mut result = RoaringBitmap::new(); 337 + 338 + for values in value_sets { 339 + if values.len() == self.property_keys.len() { 340 + let composite_key = CompositeKey::new(values.clone()); 341 + if let Some(bitmap) = self.exact_index.get(&composite_key) { 342 + result |= &*bitmap; 343 + } 344 + } 345 + } 346 + 347 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 348 + } 349 + 350 + fn update_stats_add(&self) { 351 + let mut stats = self.stats.write(); 352 + stats.total_nodes += 1; 353 + stats.last_updated = chrono::Utc::now(); 354 + } 355 + 356 + fn update_stats_remove(&self) { 357 + let mut stats = self.stats.write(); 358 + if stats.total_nodes > 0 { 359 + stats.total_nodes -= 1; 360 + } 361 + stats.last_updated = chrono::Utc::now(); 362 + } 363 + 364 + fn update_query_stats(&self) { 365 + let mut stats = self.stats.write(); 366 + stats.query_count += 1; 367 + stats.hit_rate = stats.query_count as f64 / (stats.query_count + 1) as f64; 368 + } 369 + } 370 + 371 + /// Query types specific to composite indexes 372 + #[derive(Debug)] 373 + pub enum CompositeIndexQuery { 374 + /// Exact match on all properties 375 + Exact(Vec<PropertyValue>), 376 + /// Prefix match on the first N properties 377 + Prefix(Vec<PropertyValue>), 378 + /// Range query on specific properties (property_index, min, max, inclusive_min, inclusive_max) 379 + Range(Vec<(usize, Option<PropertyValue>, Option<PropertyValue>, bool, bool)>), 380 + /// IN query with multiple complete composite keys 381 + In(Vec<Vec<PropertyValue>>), 382 + } 383 + 384 + #[cfg(test)] 385 + mod tests { 386 + use super::*; 387 + 388 + #[test] 389 + fn test_composite_key_prefix_match() { 390 + let key = CompositeKey::new(vec![ 391 + PropertyValue::String("Alice".to_string()), 392 + PropertyValue::Integer(30), 393 + PropertyValue::String("Engineer".to_string()), 394 + ]); 395 + 396 + // Should match prefix 397 + assert!(key.matches_prefix(&[PropertyValue::String("Alice".to_string())])); 398 + assert!(key.matches_prefix(&[ 399 + PropertyValue::String("Alice".to_string()), 400 + PropertyValue::Integer(30), 401 + ])); 402 + 403 + // Should not match non-prefix 404 + assert!(!key.matches_prefix(&[PropertyValue::String("Bob".to_string())])); 405 + assert!(!key.matches_prefix(&[ 406 + PropertyValue::String("Alice".to_string()), 407 + PropertyValue::Integer(25), 408 + ])); 409 + 410 + // Should not match longer than key 411 + assert!(!key.matches_prefix(&[ 412 + PropertyValue::String("Alice".to_string()), 413 + PropertyValue::Integer(30), 414 + PropertyValue::String("Engineer".to_string()), 415 + PropertyValue::Boolean(true), 416 + ])); 417 + } 418 + 419 + #[test] 420 + fn test_composite_index_exact_query() { 421 + let property_keys = vec![PropertyKeyId(1), PropertyKeyId(2)]; 422 + let index = CompositeIndex::new(property_keys, false); 423 + let node1 = NodeId(1); 424 + let node2 = NodeId(2); 425 + 426 + let props1 = HashMap::from([ 427 + (PropertyKeyId(1), PropertyValue::String("Alice".to_string())), 428 + (PropertyKeyId(2), PropertyValue::Integer(30)), 429 + ]); 430 + let props2 = HashMap::from([ 431 + (PropertyKeyId(1), PropertyValue::String("Bob".to_string())), 432 + (PropertyKeyId(2), PropertyValue::Integer(25)), 433 + ]); 434 + 435 + // Add nodes 436 + index.add_node(node1, &props1).unwrap(); 437 + index.add_node(node2, &props2).unwrap(); 438 + 439 + // Query for exact match 440 + let query = CompositeIndexQuery::Exact(vec![ 441 + PropertyValue::String("Alice".to_string()), 442 + PropertyValue::Integer(30), 443 + ]); 444 + let result = index.query(&query).unwrap(); 445 + 446 + assert_eq!(result.nodes.len(), 1); 447 + assert!(result.nodes.contains(&node1)); 448 + assert!(!result.nodes.contains(&node2)); 449 + } 450 + 451 + #[test] 452 + fn test_composite_index_prefix_query() { 453 + let property_keys = vec![PropertyKeyId(1), PropertyKeyId(2), PropertyKeyId(3)]; 454 + let index = CompositeIndex::new(property_keys, false); 455 + let node1 = NodeId(1); 456 + let node2 = NodeId(2); 457 + let node3 = NodeId(3); 458 + 459 + let props1 = HashMap::from([ 460 + (PropertyKeyId(1), PropertyValue::String("Alice".to_string())), 461 + (PropertyKeyId(2), PropertyValue::Integer(30)), 462 + (PropertyKeyId(3), PropertyValue::String("Engineer".to_string())), 463 + ]); 464 + let props2 = HashMap::from([ 465 + (PropertyKeyId(1), PropertyValue::String("Alice".to_string())), 466 + (PropertyKeyId(2), PropertyValue::Integer(25)), 467 + (PropertyKeyId(3), PropertyValue::String("Designer".to_string())), 468 + ]); 469 + let props3 = HashMap::from([ 470 + (PropertyKeyId(1), PropertyValue::String("Bob".to_string())), 471 + (PropertyKeyId(2), PropertyValue::Integer(30)), 472 + (PropertyKeyId(3), PropertyValue::String("Manager".to_string())), 473 + ]); 474 + 475 + // Add nodes 476 + index.add_node(node1, &props1).unwrap(); 477 + index.add_node(node2, &props2).unwrap(); 478 + index.add_node(node3, &props3).unwrap(); 479 + 480 + // Query for prefix match on first property 481 + let query = CompositeIndexQuery::Prefix(vec![ 482 + PropertyValue::String("Alice".to_string()), 483 + ]); 484 + let result = index.query(&query).unwrap(); 485 + 486 + assert_eq!(result.nodes.len(), 2); 487 + assert!(result.nodes.contains(&node1)); 488 + assert!(result.nodes.contains(&node2)); 489 + assert!(!result.nodes.contains(&node3)); 490 + } 491 + 492 + #[test] 493 + fn test_composite_index_unique_constraint() { 494 + let property_keys = vec![PropertyKeyId(1), PropertyKeyId(2)]; 495 + let index = CompositeIndex::new(property_keys, true); 496 + let node1 = NodeId(1); 497 + let node2 = NodeId(2); 498 + 499 + let props = HashMap::from([ 500 + (PropertyKeyId(1), PropertyValue::String("Alice".to_string())), 501 + (PropertyKeyId(2), PropertyValue::Integer(30)), 502 + ]); 503 + 504 + // First insertion should succeed 505 + assert!(index.add_node(node1, &props).is_ok()); 506 + 507 + // Second insertion with same composite key should fail 508 + assert!(index.add_node(node2, &props).is_err()); 509 + } 510 + }
+482
src/index/label_index.rs
··· 1 + use std::sync::Arc; 2 + use roaring::RoaringBitmap; 3 + use dashmap::DashMap; 4 + use parking_lot::RwLock; 5 + use crate::{NodeId, LabelId, Result}; 6 + use crate::index::types::{IndexQuery, IndexQueryResult, IndexQueryStats, IndexError, IndexStats}; 7 + use chrono; 8 + use tracing::{debug, warn, instrument}; 9 + use std::time::Instant; 10 + 11 + /// Efficient label index for fast node lookup by labels 12 + pub struct LabelIndex { 13 + /// Maps label ID to nodes that have that label 14 + label_to_nodes: Arc<DashMap<LabelId, RoaringBitmap>>, 15 + 16 + /// Maps node ID to labels it has (reverse index for removal) 17 + node_to_labels: Arc<DashMap<NodeId, RoaringBitmap>>, 18 + 19 + /// Statistics tracking 20 + stats: Arc<RwLock<IndexStats>>, 21 + } 22 + 23 + impl LabelIndex { 24 + pub fn new() -> Self { 25 + Self { 26 + label_to_nodes: Arc::new(DashMap::new()), 27 + node_to_labels: Arc::new(DashMap::new()), 28 + stats: Arc::new(RwLock::new(IndexStats::default())), 29 + } 30 + } 31 + 32 + /// Add a label to a node 33 + #[instrument(skip(self), level = "debug")] 34 + pub fn add_node_label(&self, node_id: NodeId, label_id: LabelId) -> Result<()> { 35 + let start = Instant::now(); 36 + 37 + // Add to label -> nodes mapping 38 + self.label_to_nodes 39 + .entry(label_id) 40 + .or_insert_with(RoaringBitmap::new) 41 + .insert(node_id.0 as u32); 42 + 43 + // Add to node -> labels mapping (for efficient removal) 44 + self.node_to_labels 45 + .entry(node_id) 46 + .or_insert_with(RoaringBitmap::new) 47 + .insert(label_id.0); 48 + 49 + // Update statistics 50 + self.update_stats_add(); 51 + 52 + debug!( 53 + node_id = %node_id.0, 54 + label_id = %label_id.0, 55 + duration_micros = start.elapsed().as_micros(), 56 + "Added label to node" 57 + ); 58 + 59 + Ok(()) 60 + } 61 + 62 + /// Remove a label from a node 63 + #[instrument(skip(self), level = "debug")] 64 + pub fn remove_node_label(&self, node_id: NodeId, label_id: LabelId) -> Result<()> { 65 + // Remove from label -> nodes mapping 66 + if let Some(mut bitmap) = self.label_to_nodes.get_mut(&label_id) { 67 + bitmap.remove(node_id.0 as u32); 68 + if bitmap.is_empty() { 69 + drop(bitmap); 70 + self.label_to_nodes.remove(&label_id); 71 + } 72 + } 73 + 74 + // Remove from node -> labels mapping 75 + if let Some(mut bitmap) = self.node_to_labels.get_mut(&node_id) { 76 + bitmap.remove(label_id.0); 77 + if bitmap.is_empty() { 78 + drop(bitmap); 79 + self.node_to_labels.remove(&node_id); 80 + } 81 + } 82 + 83 + // Update statistics 84 + self.update_stats_remove(); 85 + 86 + debug!( 87 + node_id = %node_id.0, 88 + label_id = %label_id.0, 89 + "Removed label from node" 90 + ); 91 + 92 + Ok(()) 93 + } 94 + 95 + /// Remove all labels from a node (e.g., when node is deleted) 96 + #[instrument(skip(self), level = "debug")] 97 + pub fn remove_node(&self, node_id: NodeId) -> Result<()> { 98 + if let Some((_, labels_bitmap)) = self.node_to_labels.remove(&node_id) { 99 + // Remove this node from all label indexes 100 + for label_id_u32 in labels_bitmap.iter() { 101 + let label_id = LabelId(label_id_u32); 102 + if let Some(mut nodes_bitmap) = self.label_to_nodes.get_mut(&label_id) { 103 + nodes_bitmap.remove(node_id.0 as u32); 104 + if nodes_bitmap.is_empty() { 105 + drop(nodes_bitmap); 106 + self.label_to_nodes.remove(&label_id); 107 + } 108 + } 109 + } 110 + } 111 + 112 + debug!(node_id = %node_id.0, "Removed all labels for node"); 113 + 114 + Ok(()) 115 + } 116 + 117 + /// Query the index 118 + #[instrument(skip(self), level = "debug")] 119 + pub fn query(&self, query: &IndexQuery) -> Result<IndexQueryResult> { 120 + let start = Instant::now(); 121 + let mut stats = IndexQueryStats::default(); 122 + stats.index_used = true; 123 + 124 + let nodes = match query { 125 + IndexQuery::Exact(value) => { 126 + // For label index, treat exact query as label ID lookup 127 + if let Some(label_id) = self.extract_label_id_from_value(value) { 128 + debug!("Executing exact label query for label: {}", label_id.0); 129 + self.get_nodes_by_label(label_id)? 130 + } else { 131 + return Err(IndexError::InvalidQuery( 132 + "Label index requires LabelId for exact queries".to_string() 133 + ).into()); 134 + } 135 + }, 136 + IndexQuery::In(values) => { 137 + debug!("Executing IN query for {} labels", values.len()); 138 + self.query_multiple_labels(values)? 139 + }, 140 + IndexQuery::Exists => { 141 + debug!("Executing EXISTS query"); 142 + self.query_all_labeled_nodes()? 143 + }, 144 + _ => { 145 + return Err(IndexError::InvalidQuery( 146 + format!("Query type not supported by label index: {}", query) 147 + ).into()); 148 + } 149 + }; 150 + 151 + stats.execution_time_micros = start.elapsed().as_micros() as u64; 152 + stats.nodes_returned = nodes.len() as u64; 153 + 154 + // Update query statistics 155 + self.update_query_stats(); 156 + 157 + debug!( 158 + query = %query, 159 + nodes_returned = nodes.len(), 160 + duration_micros = stats.execution_time_micros, 161 + "Label index query completed" 162 + ); 163 + 164 + Ok(IndexQueryResult { nodes, stats }) 165 + } 166 + 167 + /// Get all nodes that have a specific label 168 + pub fn get_nodes_by_label(&self, label_id: LabelId) -> Result<Vec<NodeId>> { 169 + Ok(self.label_to_nodes 170 + .get(&label_id) 171 + .map(|bitmap| bitmap.iter().map(|id| NodeId(id as u64)).collect()) 172 + .unwrap_or_default()) 173 + } 174 + 175 + /// Get all labels for a specific node 176 + pub fn get_node_labels(&self, node_id: NodeId) -> Result<Vec<LabelId>> { 177 + Ok(self.node_to_labels 178 + .get(&node_id) 179 + .map(|bitmap| bitmap.iter().map(|id| LabelId(id)).collect()) 180 + .unwrap_or_default()) 181 + } 182 + 183 + /// Check if a node has a specific label 184 + pub fn node_has_label(&self, node_id: NodeId, label_id: LabelId) -> Result<bool> { 185 + Ok(self.node_to_labels 186 + .get(&node_id) 187 + .map_or(false, |bitmap| bitmap.contains(label_id.0))) 188 + } 189 + 190 + /// Get all nodes that have any labels 191 + pub fn get_all_labeled_nodes(&self) -> Result<Vec<NodeId>> { 192 + let mut result = RoaringBitmap::new(); 193 + 194 + for entry in self.label_to_nodes.iter() { 195 + result |= &*entry.value(); 196 + } 197 + 198 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 199 + } 200 + 201 + /// Get current index statistics 202 + pub fn get_stats(&self) -> IndexStats { 203 + let mut stats = self.stats.read().clone(); 204 + 205 + // Update dynamic statistics 206 + stats.unique_values = self.label_to_nodes.len() as u64; 207 + stats.total_nodes = self.node_to_labels.len() as u64; 208 + 209 + stats 210 + } 211 + 212 + /// Rebuild the entire index (useful for optimization) 213 + pub fn rebuild(&self) -> Result<()> { 214 + debug!("Rebuilding label index"); 215 + 216 + // Clear all indexes 217 + self.label_to_nodes.clear(); 218 + self.node_to_labels.clear(); 219 + 220 + // Reset statistics 221 + let mut stats = self.stats.write(); 222 + stats.last_rebuild = Some(chrono::Utc::now()); 223 + stats.total_nodes = 0; 224 + stats.unique_values = 0; 225 + 226 + Ok(()) 227 + } 228 + 229 + // Private helper methods 230 + 231 + fn query_multiple_labels(&self, values: &[crate::core::PropertyValue]) -> Result<Vec<NodeId>> { 232 + let mut result = RoaringBitmap::new(); 233 + 234 + for value in values { 235 + if let Some(label_id) = self.extract_label_id_from_value(value) { 236 + if let Some(bitmap) = self.label_to_nodes.get(&label_id) { 237 + result |= &*bitmap; 238 + } 239 + } 240 + } 241 + 242 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 243 + } 244 + 245 + fn query_all_labeled_nodes(&self) -> Result<Vec<NodeId>> { 246 + self.get_all_labeled_nodes() 247 + } 248 + 249 + fn extract_label_id_from_value(&self, value: &crate::core::PropertyValue) -> Option<LabelId> { 250 + match value { 251 + crate::core::PropertyValue::Integer(id) => Some(LabelId(*id as u32)), 252 + _ => None, 253 + } 254 + } 255 + 256 + fn update_stats_add(&self) { 257 + let mut stats = self.stats.write(); 258 + stats.total_nodes += 1; 259 + stats.last_updated = chrono::Utc::now(); 260 + } 261 + 262 + fn update_stats_remove(&self) { 263 + let mut stats = self.stats.write(); 264 + if stats.total_nodes > 0 { 265 + stats.total_nodes -= 1; 266 + } 267 + stats.last_updated = chrono::Utc::now(); 268 + } 269 + 270 + fn update_query_stats(&self) { 271 + let mut stats = self.stats.write(); 272 + stats.query_count += 1; 273 + // Simple hit rate calculation 274 + stats.hit_rate = stats.query_count as f64 / (stats.query_count + 1) as f64; 275 + } 276 + } 277 + 278 + impl Default for LabelIndex { 279 + fn default() -> Self { 280 + Self::new() 281 + } 282 + } 283 + 284 + #[cfg(test)] 285 + mod tests { 286 + use super::*; 287 + use crate::core::PropertyValue; 288 + 289 + #[test] 290 + fn test_add_and_get_node_label() { 291 + let index = LabelIndex::new(); 292 + let node_id = NodeId(1); 293 + let label_id = LabelId(100); 294 + 295 + // Add label to node 296 + index.add_node_label(node_id, label_id).unwrap(); 297 + 298 + // Check that node has the label 299 + assert!(index.node_has_label(node_id, label_id).unwrap()); 300 + 301 + // Get nodes by label 302 + let nodes = index.get_nodes_by_label(label_id).unwrap(); 303 + assert_eq!(nodes.len(), 1); 304 + assert!(nodes.contains(&node_id)); 305 + 306 + // Get labels for node 307 + let labels = index.get_node_labels(node_id).unwrap(); 308 + assert_eq!(labels.len(), 1); 309 + assert!(labels.contains(&label_id)); 310 + } 311 + 312 + #[test] 313 + fn test_multiple_nodes_same_label() { 314 + let index = LabelIndex::new(); 315 + let node1 = NodeId(1); 316 + let node2 = NodeId(2); 317 + let node3 = NodeId(3); 318 + let person_label = LabelId(100); 319 + 320 + // Add same label to multiple nodes 321 + index.add_node_label(node1, person_label).unwrap(); 322 + index.add_node_label(node2, person_label).unwrap(); 323 + index.add_node_label(node3, person_label).unwrap(); 324 + 325 + // Get all nodes with Person label 326 + let nodes = index.get_nodes_by_label(person_label).unwrap(); 327 + assert_eq!(nodes.len(), 3); 328 + assert!(nodes.contains(&node1)); 329 + assert!(nodes.contains(&node2)); 330 + assert!(nodes.contains(&node3)); 331 + } 332 + 333 + #[test] 334 + fn test_multiple_labels_same_node() { 335 + let index = LabelIndex::new(); 336 + let node_id = NodeId(1); 337 + let person_label = LabelId(100); 338 + let employee_label = LabelId(200); 339 + 340 + // Add multiple labels to same node 341 + index.add_node_label(node_id, person_label).unwrap(); 342 + index.add_node_label(node_id, employee_label).unwrap(); 343 + 344 + // Check that node has both labels 345 + assert!(index.node_has_label(node_id, person_label).unwrap()); 346 + assert!(index.node_has_label(node_id, employee_label).unwrap()); 347 + 348 + // Get labels for node 349 + let labels = index.get_node_labels(node_id).unwrap(); 350 + assert_eq!(labels.len(), 2); 351 + assert!(labels.contains(&person_label)); 352 + assert!(labels.contains(&employee_label)); 353 + 354 + // Each label should return the node 355 + let person_nodes = index.get_nodes_by_label(person_label).unwrap(); 356 + assert!(person_nodes.contains(&node_id)); 357 + 358 + let employee_nodes = index.get_nodes_by_label(employee_label).unwrap(); 359 + assert!(employee_nodes.contains(&node_id)); 360 + } 361 + 362 + #[test] 363 + fn test_remove_node_label() { 364 + let index = LabelIndex::new(); 365 + let node_id = NodeId(1); 366 + let label1 = LabelId(100); 367 + let label2 = LabelId(200); 368 + 369 + // Add two labels 370 + index.add_node_label(node_id, label1).unwrap(); 371 + index.add_node_label(node_id, label2).unwrap(); 372 + 373 + // Remove one label 374 + index.remove_node_label(node_id, label1).unwrap(); 375 + 376 + // Should not have first label 377 + assert!(!index.node_has_label(node_id, label1).unwrap()); 378 + // Should still have second label 379 + assert!(index.node_has_label(node_id, label2).unwrap()); 380 + 381 + // First label should not return the node 382 + let label1_nodes = index.get_nodes_by_label(label1).unwrap(); 383 + assert!(!label1_nodes.contains(&node_id)); 384 + 385 + // Second label should still return the node 386 + let label2_nodes = index.get_nodes_by_label(label2).unwrap(); 387 + assert!(label2_nodes.contains(&node_id)); 388 + } 389 + 390 + #[test] 391 + fn test_remove_entire_node() { 392 + let index = LabelIndex::new(); 393 + let node_id = NodeId(1); 394 + let label1 = LabelId(100); 395 + let label2 = LabelId(200); 396 + 397 + // Add multiple labels to node 398 + index.add_node_label(node_id, label1).unwrap(); 399 + index.add_node_label(node_id, label2).unwrap(); 400 + 401 + // Remove entire node 402 + index.remove_node(node_id).unwrap(); 403 + 404 + // Node should not have any labels 405 + assert!(!index.node_has_label(node_id, label1).unwrap()); 406 + assert!(!index.node_has_label(node_id, label2).unwrap()); 407 + 408 + // Labels should not return the node 409 + let label1_nodes = index.get_nodes_by_label(label1).unwrap(); 410 + assert!(!label1_nodes.contains(&node_id)); 411 + 412 + let label2_nodes = index.get_nodes_by_label(label2).unwrap(); 413 + assert!(!label2_nodes.contains(&node_id)); 414 + } 415 + 416 + #[test] 417 + fn test_query_exact() { 418 + let index = LabelIndex::new(); 419 + let node1 = NodeId(1); 420 + let node2 = NodeId(2); 421 + let label_id = LabelId(100); 422 + 423 + index.add_node_label(node1, label_id).unwrap(); 424 + index.add_node_label(node2, label_id).unwrap(); 425 + 426 + // Query for label 427 + let query = IndexQuery::Exact(PropertyValue::Integer(100)); 428 + let result = index.query(&query).unwrap(); 429 + 430 + assert_eq!(result.nodes.len(), 2); 431 + assert!(result.nodes.contains(&node1)); 432 + assert!(result.nodes.contains(&node2)); 433 + assert!(result.stats.index_used); 434 + } 435 + 436 + #[test] 437 + fn test_query_in() { 438 + let index = LabelIndex::new(); 439 + let node1 = NodeId(1); 440 + let node2 = NodeId(2); 441 + let node3 = NodeId(3); 442 + let label1 = LabelId(100); 443 + let label2 = LabelId(200); 444 + let label3 = LabelId(300); 445 + 446 + index.add_node_label(node1, label1).unwrap(); 447 + index.add_node_label(node2, label2).unwrap(); 448 + index.add_node_label(node3, label3).unwrap(); 449 + 450 + // Query for multiple labels 451 + let query = IndexQuery::In(vec![ 452 + PropertyValue::Integer(100), 453 + PropertyValue::Integer(300), 454 + ]); 455 + let result = index.query(&query).unwrap(); 456 + 457 + assert_eq!(result.nodes.len(), 2); 458 + assert!(result.nodes.contains(&node1)); 459 + assert!(result.nodes.contains(&node3)); 460 + assert!(!result.nodes.contains(&node2)); 461 + } 462 + 463 + #[test] 464 + fn test_query_exists() { 465 + let index = LabelIndex::new(); 466 + let node1 = NodeId(1); 467 + let node2 = NodeId(2); 468 + let label1 = LabelId(100); 469 + let label2 = LabelId(200); 470 + 471 + index.add_node_label(node1, label1).unwrap(); 472 + index.add_node_label(node2, label2).unwrap(); 473 + 474 + // Query for all labeled nodes 475 + let query = IndexQuery::Exists; 476 + let result = index.query(&query).unwrap(); 477 + 478 + assert_eq!(result.nodes.len(), 2); 479 + assert!(result.nodes.contains(&node1)); 480 + assert!(result.nodes.contains(&node2)); 481 + } 482 + }
+305 -39
src/index/mod.rs
··· 1 - use roaring::RoaringBitmap; 2 - use dashmap::DashMap; 1 + pub mod types; 2 + pub mod property_index; 3 + pub mod label_index; 4 + pub mod composite_index; 5 + pub mod persistent; 6 + 7 + // Re-export commonly used types 8 + pub use types::{IndexType, IndexQuery, IndexQueryResult, IndexConfig, IndexStats, IndexError}; 9 + pub use composite_index::{CompositeIndex, CompositeIndexQuery}; 10 + pub use persistent::PersistentIndexManager; 11 + 3 12 use std::sync::Arc; 13 + use std::collections::HashMap; 4 14 use crate::{NodeId, LabelId, PropertyKeyId, Result}; 5 15 use crate::core::PropertyValue; 16 + // No additional private imports needed - using re-exported types 17 + use self::property_index::PropertyValueIndex; 18 + use self::label_index::LabelIndex; 19 + use tracing::{debug, warn}; 6 20 21 + /// Unified index manager that handles all types of indexes 7 22 pub struct IndexManager { 8 - label_index: Arc<DashMap<LabelId, RoaringBitmap>>, 9 - property_index: Arc<DashMap<(PropertyKeyId, PropertyValue), RoaringBitmap>>, 23 + /// Property-based indexes 24 + property_indexes: Arc<dashmap::DashMap<PropertyKeyId, PropertyValueIndex>>, 25 + 26 + /// Label-based indexes 27 + label_indexes: Arc<dashmap::DashMap<LabelId, LabelIndex>>, 28 + 29 + /// Composite indexes (using property key combination as key) 30 + composite_indexes: Arc<dashmap::DashMap<Vec<PropertyKeyId>, CompositeIndex>>, 31 + 32 + /// Global label index for all labels 33 + global_label_index: Arc<LabelIndex>, 34 + 35 + /// Index configurations and metadata 36 + index_configs: Arc<dashmap::DashMap<String, IndexConfig>>, 10 37 } 11 38 12 39 impl IndexManager { 13 40 pub fn new() -> Self { 14 41 Self { 15 - label_index: Arc::new(DashMap::new()), 16 - property_index: Arc::new(DashMap::new()), 42 + property_indexes: Arc::new(dashmap::DashMap::new()), 43 + label_indexes: Arc::new(dashmap::DashMap::new()), 44 + composite_indexes: Arc::new(dashmap::DashMap::new()), 45 + global_label_index: Arc::new(LabelIndex::new()), 46 + index_configs: Arc::new(dashmap::DashMap::new()), 17 47 } 18 48 } 19 49 20 - pub fn add_node_label(&self, node_id: NodeId, label_id: LabelId) { 21 - self.label_index 22 - .entry(label_id) 23 - .or_insert_with(RoaringBitmap::new) 24 - .insert(node_id.0 as u32); 50 + /// Create a new index 51 + pub fn create_index(&self, index_type: IndexType, name: Option<String>, unique: bool) -> Result<String> { 52 + let index_name = name.unwrap_or_else(|| self.generate_index_name(&index_type)); 53 + 54 + // Check if index already exists 55 + if self.index_configs.contains_key(&index_name) { 56 + return Err(IndexError::AlreadyExists(index_name).into()); 57 + } 58 + 59 + match &index_type { 60 + IndexType::Property(property_key_id) => { 61 + let property_index = PropertyValueIndex::new(*property_key_id, unique); 62 + self.property_indexes.insert(*property_key_id, property_index); 63 + }, 64 + IndexType::Label(label_id) => { 65 + let label_index = LabelIndex::new(); 66 + self.label_indexes.insert(*label_id, label_index); 67 + }, 68 + IndexType::Composite(property_key_ids) => { 69 + let composite_index = CompositeIndex::new(property_key_ids.clone(), unique); 70 + self.composite_indexes.insert(property_key_ids.clone(), composite_index); 71 + }, 72 + _ => { 73 + return Err(IndexError::UnsupportedType(format!("{}", index_type)).into()); 74 + } 75 + } 76 + 77 + // Store index configuration 78 + let config = IndexConfig::new(index_type.clone(), Some(index_name.clone()), unique); 79 + self.index_configs.insert(index_name.clone(), config); 80 + 81 + debug!(index_name = %index_name, index_type = %index_type, "Created new index"); 82 + 83 + Ok(index_name) 25 84 } 26 85 27 - pub fn remove_node_label(&self, node_id: NodeId, label_id: LabelId) { 28 - if let Some(mut bitmap) = self.label_index.get_mut(&label_id) { 29 - bitmap.remove(node_id.0 as u32); 86 + /// Drop an existing index 87 + pub fn drop_index(&self, name: &str) -> Result<()> { 88 + let config = self.index_configs.remove(name) 89 + .ok_or_else(|| IndexError::NotFound(name.to_string()))?; 90 + 91 + match config.1.index_type { 92 + IndexType::Property(property_key_id) => { 93 + self.property_indexes.remove(&property_key_id); 94 + }, 95 + IndexType::Label(label_id) => { 96 + self.label_indexes.remove(&label_id); 97 + }, 98 + IndexType::Composite(property_key_ids) => { 99 + self.composite_indexes.remove(&property_key_ids); 100 + }, 101 + _ => {} 30 102 } 103 + 104 + debug!(index_name = %name, "Dropped index"); 105 + 106 + Ok(()) 31 107 } 32 108 33 - pub fn get_nodes_by_label(&self, label_id: LabelId) -> Vec<NodeId> { 34 - self.label_index 35 - .get(&label_id) 36 - .map(|bitmap| { 37 - bitmap.iter() 38 - .map(|id| NodeId(id as u64)) 39 - .collect() 40 - }) 41 - .unwrap_or_default() 109 + /// Add a node with its labels to all relevant indexes 110 + pub fn add_node(&self, node_id: NodeId, labels: &[LabelId], properties: &HashMap<PropertyKeyId, PropertyValue>) -> Result<()> { 111 + // Add to global label index 112 + for &label_id in labels { 113 + self.global_label_index.add_node_label(node_id, label_id)?; 114 + 115 + // Add to specific label index if it exists 116 + if let Some(label_index) = self.label_indexes.get(&label_id) { 117 + label_index.add_node_label(node_id, label_id)?; 118 + } 119 + } 120 + 121 + // Add to property indexes 122 + for (&property_key_id, property_value) in properties { 123 + if let Some(property_index) = self.property_indexes.get(&property_key_id) { 124 + property_index.add_node(node_id, property_value)?; 125 + } 126 + } 127 + 128 + // Add to composite indexes 129 + for composite_index in self.composite_indexes.iter() { 130 + composite_index.value().add_node(node_id, properties)?; 131 + } 132 + 133 + debug!(node_id = %node_id.0, labels = labels.len(), properties = properties.len(), "Added node to indexes"); 134 + 135 + Ok(()) 42 136 } 43 137 44 - pub fn add_node_property(&self, node_id: NodeId, property_key: PropertyKeyId, value: PropertyValue) { 45 - let key = (property_key, value); 46 - self.property_index 47 - .entry(key) 48 - .or_insert_with(RoaringBitmap::new) 49 - .insert(node_id.0 as u32); 138 + /// Remove a node from all indexes 139 + pub fn remove_node(&self, node_id: NodeId) -> Result<()> { 140 + // Remove from global label index 141 + self.global_label_index.remove_node(node_id)?; 142 + 143 + // Remove from all specific label indexes 144 + for label_index in self.label_indexes.iter() { 145 + label_index.value().remove_node(node_id)?; 146 + } 147 + 148 + // Remove from property indexes - we need the current properties to do this properly 149 + // For now, this is a limitation - we'd need to track node properties separately 150 + // or modify the property index to support removal by node_id only 151 + 152 + debug!(node_id = %node_id.0, "Removed node from indexes"); 153 + 154 + Ok(()) 50 155 } 51 156 52 - pub fn get_nodes_by_property(&self, property_key: PropertyKeyId, value: &PropertyValue) -> Vec<NodeId> { 53 - let key = (property_key, value.clone()); 54 - self.property_index 55 - .get(&key) 56 - .map(|bitmap| { 57 - bitmap.iter() 58 - .map(|id| NodeId(id as u64)) 59 - .collect() 60 - }) 61 - .unwrap_or_default() 157 + /// Update a node's properties in indexes 158 + pub fn update_node_properties(&self, node_id: NodeId, old_properties: &HashMap<PropertyKeyId, PropertyValue>, new_properties: &HashMap<PropertyKeyId, PropertyValue>) -> Result<()> { 159 + // Remove old property values 160 + for (&property_key_id, old_value) in old_properties { 161 + if let Some(property_index) = self.property_indexes.get(&property_key_id) { 162 + property_index.remove_node(node_id, old_value)?; 163 + } 164 + } 165 + 166 + // Remove from composite indexes (old values) 167 + for composite_index in self.composite_indexes.iter() { 168 + composite_index.value().remove_node(node_id, old_properties)?; 169 + } 170 + 171 + // Add new property values 172 + for (&property_key_id, new_value) in new_properties { 173 + if let Some(property_index) = self.property_indexes.get(&property_key_id) { 174 + property_index.add_node(node_id, new_value)?; 175 + } 176 + } 177 + 178 + // Add to composite indexes (new values) 179 + for composite_index in self.composite_indexes.iter() { 180 + composite_index.value().add_node(node_id, new_properties)?; 181 + } 182 + 183 + debug!(node_id = %node_id.0, "Updated node properties in indexes"); 184 + 185 + Ok(()) 186 + } 187 + 188 + /// Query an index 189 + pub fn query(&self, index_type: &IndexType, query: &IndexQuery) -> Result<IndexQueryResult> { 190 + match index_type { 191 + IndexType::Property(property_key_id) => { 192 + if let Some(property_index) = self.property_indexes.get(property_key_id) { 193 + property_index.query(query) 194 + } else { 195 + Err(IndexError::NotFound(format!("Property index for key {}", property_key_id.0)).into()) 196 + } 197 + }, 198 + IndexType::Label(label_id) => { 199 + if let Some(label_index) = self.label_indexes.get(label_id) { 200 + label_index.query(query) 201 + } else { 202 + // Use global label index if specific label index doesn't exist 203 + self.global_label_index.query(query) 204 + } 205 + }, 206 + IndexType::Composite(property_key_ids) => { 207 + if let Some(composite_index) = self.composite_indexes.get(property_key_ids) { 208 + // Convert IndexQuery to CompositeIndexQuery 209 + match query { 210 + IndexQuery::Composite(values) => { 211 + let composite_query = composite_index::CompositeIndexQuery::Exact(values.clone()); 212 + composite_index.query(&composite_query) 213 + }, 214 + _ => { 215 + Err(IndexError::InvalidQuery( 216 + format!("Query type {:?} not supported for composite indexes", query) 217 + ).into()) 218 + } 219 + } 220 + } else { 221 + Err(IndexError::NotFound(format!("Composite index for keys {:?}", property_key_ids)).into()) 222 + } 223 + }, 224 + _ => { 225 + Err(IndexError::UnsupportedType(format!("{}", index_type)).into()) 226 + } 227 + } 228 + } 229 + 230 + /// Get nodes by label (convenience method) 231 + pub fn get_nodes_by_label(&self, label_id: LabelId) -> Result<Vec<NodeId>> { 232 + self.global_label_index.get_nodes_by_label(label_id) 233 + } 234 + 235 + /// Get nodes by property (convenience method) 236 + pub fn get_nodes_by_property(&self, property_key_id: PropertyKeyId, value: &PropertyValue) -> Result<Vec<NodeId>> { 237 + if let Some(property_index) = self.property_indexes.get(&property_key_id) { 238 + let query = IndexQuery::Exact(value.clone()); 239 + let result = property_index.query(&query)?; 240 + Ok(result.nodes) 241 + } else { 242 + Ok(Vec::new()) 243 + } 244 + } 245 + 246 + /// Get nodes by composite key (convenience method) 247 + pub fn get_nodes_by_composite_key(&self, property_key_ids: &[PropertyKeyId], values: &[PropertyValue]) -> Result<Vec<NodeId>> { 248 + if let Some(composite_index) = self.composite_indexes.get(property_key_ids) { 249 + let query = composite_index::CompositeIndexQuery::Exact(values.to_vec()); 250 + let result = composite_index.query(&query)?; 251 + Ok(result.nodes) 252 + } else { 253 + Ok(Vec::new()) 254 + } 255 + } 256 + 257 + /// Query composite index with prefix (convenience method) 258 + pub fn query_composite_prefix(&self, property_key_ids: &[PropertyKeyId], partial_values: &[PropertyValue]) -> Result<Vec<NodeId>> { 259 + if let Some(composite_index) = self.composite_indexes.get(property_key_ids) { 260 + let query = composite_index::CompositeIndexQuery::Prefix(partial_values.to_vec()); 261 + let result = composite_index.query(&query)?; 262 + Ok(result.nodes) 263 + } else { 264 + Ok(Vec::new()) 265 + } 266 + } 267 + 268 + /// List all available indexes 269 + pub fn list_indexes(&self) -> Vec<IndexConfig> { 270 + self.index_configs.iter() 271 + .map(|entry| entry.value().clone()) 272 + .collect() 273 + } 274 + 275 + /// Get statistics for all indexes 276 + pub fn get_index_stats(&self) -> HashMap<String, types::IndexStats> { 277 + let mut stats = HashMap::new(); 278 + 279 + // Add property index stats 280 + for entry in self.property_indexes.iter() { 281 + let key = format!("property_{}", entry.key().0); 282 + stats.insert(key, entry.value().get_stats()); 283 + } 284 + 285 + // Add label index stats 286 + stats.insert("global_labels".to_string(), self.global_label_index.get_stats()); 287 + 288 + for entry in self.label_indexes.iter() { 289 + let key = format!("label_{}", entry.key().0); 290 + stats.insert(key, entry.value().get_stats()); 291 + } 292 + 293 + // Add composite index stats 294 + for entry in self.composite_indexes.iter() { 295 + let key = format!("composite_{}", 296 + entry.key().iter() 297 + .map(|id| id.0.to_string()) 298 + .collect::<Vec<_>>() 299 + .join("_")); 300 + stats.insert(key, entry.value().get_stats()); 301 + } 302 + 303 + stats 304 + } 305 + 306 + // Private helper methods 307 + 308 + fn generate_index_name(&self, index_type: &IndexType) -> String { 309 + match index_type { 310 + IndexType::Property(prop_id) => format!("property_index_{}", prop_id.0), 311 + IndexType::Label(label_id) => format!("label_index_{}", label_id.0), 312 + IndexType::Composite(prop_ids) => { 313 + format!("composite_index_{}", 314 + prop_ids.iter() 315 + .map(|id| id.0.to_string()) 316 + .collect::<Vec<_>>() 317 + .join("_")) 318 + }, 319 + IndexType::FullText(prop_id) => format!("fulltext_index_{}", prop_id.0), 320 + IndexType::Range(prop_id) => format!("range_index_{}", prop_id.0), 321 + } 322 + } 323 + } 324 + 325 + impl Default for IndexManager { 326 + fn default() -> Self { 327 + Self::new() 62 328 } 63 329 }
+350
src/index/persistent.rs
··· 1 + use std::sync::Arc; 2 + use std::collections::HashMap; 3 + use crate::{NodeId, LabelId, PropertyKeyId, Result}; 4 + use crate::core::PropertyValue; 5 + use crate::storage::StorageEngine; 6 + use crate::index::types::{IndexType, IndexQuery, IndexQueryResult, IndexConfig, IndexStats, IndexError}; 7 + use crate::index::{IndexManager as MemoryIndexManager, CompositeIndex, composite_index::CompositeIndexQuery}; 8 + use serde::{Serialize, Deserialize}; 9 + use tracing::{debug, warn, info}; 10 + use roaring::RoaringBitmap; 11 + use dashmap::DashMap; 12 + 13 + /// Serializable representation of index data 14 + #[derive(Debug, Clone, Serialize, Deserialize)] 15 + pub struct SerializableIndexData { 16 + pub property_indexes: HashMap<u64, Vec<u8>>, // PropertyKeyId -> serialized PropertyValueIndex 17 + pub label_indexes: HashMap<u64, Vec<u8>>, // LabelId -> serialized LabelIndex 18 + pub composite_indexes: HashMap<Vec<u64>, Vec<u8>>, // PropertyKeyIds -> serialized CompositeIndex 19 + pub global_label_index: Vec<u8>, // serialized global LabelIndex 20 + } 21 + 22 + /// Persistent index manager that can save/load indexes to/from storage 23 + pub struct PersistentIndexManager { 24 + /// In-memory index manager for fast access 25 + memory_manager: Arc<MemoryIndexManager>, 26 + 27 + /// Storage engine for persistence 28 + storage: Arc<dyn StorageEngine>, 29 + 30 + /// Flag to track if indexes have been loaded from storage 31 + loaded: Arc<parking_lot::RwLock<bool>>, 32 + } 33 + 34 + impl PersistentIndexManager { 35 + /// Create a new persistent index manager 36 + pub fn new(storage: Arc<dyn StorageEngine>) -> Self { 37 + Self { 38 + memory_manager: Arc::new(MemoryIndexManager::new()), 39 + storage, 40 + loaded: Arc::new(parking_lot::RwLock::new(false)), 41 + } 42 + } 43 + 44 + /// Create a new persistent index manager with existing memory manager 45 + pub fn with_memory_manager( 46 + memory_manager: Arc<MemoryIndexManager>, 47 + storage: Arc<dyn StorageEngine> 48 + ) -> Self { 49 + Self { 50 + memory_manager, 51 + storage, 52 + loaded: Arc::new(parking_lot::RwLock::new(false)), 53 + } 54 + } 55 + 56 + /// Load indexes from storage if not already loaded 57 + pub async fn ensure_loaded(&self) -> Result<()> { 58 + let mut loaded = self.loaded.write(); 59 + if !*loaded { 60 + self.load_indexes().await?; 61 + *loaded = true; 62 + } 63 + Ok(()) 64 + } 65 + 66 + /// Load all indexes from storage 67 + async fn load_indexes(&self) -> Result<()> { 68 + info!("Loading indexes from storage"); 69 + 70 + // Load index configurations 71 + let config_names = self.storage.list_index_configs().await?; 72 + debug!("Found {} index configurations", config_names.len()); 73 + 74 + for config_name in config_names { 75 + if let Some(config_data) = self.storage.get_index_config(&config_name).await? { 76 + match bincode::deserialize::<IndexConfig>(&config_data) { 77 + Ok(config) => { 78 + if let Err(e) = self.memory_manager.create_index( 79 + config.index_type.clone(), 80 + Some(config_name.clone()), 81 + config.unique 82 + ) { 83 + warn!("Failed to recreate index {}: {}", config_name, e); 84 + } else { 85 + debug!("Recreated index: {}", config_name); 86 + } 87 + }, 88 + Err(e) => { 89 + warn!("Failed to deserialize index config {}: {}", config_name, e); 90 + } 91 + } 92 + } 93 + } 94 + 95 + // Load index data 96 + let data_keys = self.storage.list_index_data_keys("").await?; 97 + debug!("Found {} index data entries", data_keys.len()); 98 + 99 + // For now, we'll just log that we found the data 100 + // In a full implementation, we'd need to deserialize and load the actual index structures 101 + for data_key in data_keys { 102 + debug!("Found index data: {}", data_key); 103 + } 104 + 105 + info!("Completed loading indexes from storage"); 106 + Ok(()) 107 + } 108 + 109 + /// Save all indexes to storage 110 + pub async fn save_indexes(&self) -> Result<()> { 111 + debug!("Saving indexes to storage"); 112 + 113 + // Save index configurations 114 + let configs = self.memory_manager.list_indexes(); 115 + for config in configs { 116 + if let Some(name) = &config.name { 117 + let config_data = bincode::serialize(&config) 118 + .map_err(|e| IndexError::Storage( 119 + crate::GigabrainError::Storage(format!("Failed to serialize config: {}", e)) 120 + ))?; 121 + 122 + self.storage.put_index_config(name, &config_data).await?; 123 + debug!("Saved index config: {}", name); 124 + } 125 + } 126 + 127 + // Save index statistics 128 + let stats = self.memory_manager.get_index_stats(); 129 + for (index_name, stat) in stats { 130 + let stats_key = format!("stats_{}", index_name); 131 + let stats_data = bincode::serialize(&stat) 132 + .map_err(|e| IndexError::Storage( 133 + crate::GigabrainError::Storage(format!("Failed to serialize stats: {}", e)) 134 + ))?; 135 + 136 + self.storage.put_index_data(&stats_key, &stats_data).await?; 137 + debug!("Saved index stats: {}", stats_key); 138 + } 139 + 140 + debug!("Completed saving indexes to storage"); 141 + Ok(()) 142 + } 143 + 144 + /// Get the underlying memory manager (for delegation) 145 + pub fn memory_manager(&self) -> &MemoryIndexManager { 146 + &self.memory_manager 147 + } 148 + 149 + /// Create a new index and persist the configuration 150 + pub async fn create_index(&self, index_type: IndexType, name: Option<String>, unique: bool) -> Result<String> { 151 + self.ensure_loaded().await?; 152 + 153 + let index_name = self.memory_manager.create_index(index_type, name, unique)?; 154 + 155 + // Persist the configuration 156 + let configs = self.memory_manager.list_indexes(); 157 + if let Some(config) = configs.iter().find(|c| c.name.as_ref() == Some(&index_name)) { 158 + let config_data = bincode::serialize(config) 159 + .map_err(|e| IndexError::Storage( 160 + crate::GigabrainError::Storage(format!("Failed to serialize config: {}", e)) 161 + ))?; 162 + 163 + self.storage.put_index_config(&index_name, &config_data).await?; 164 + debug!("Persisted index config: {}", index_name); 165 + } 166 + 167 + Ok(index_name) 168 + } 169 + 170 + /// Drop an index and remove it from storage 171 + pub async fn drop_index(&self, name: &str) -> Result<()> { 172 + self.ensure_loaded().await?; 173 + 174 + self.memory_manager.drop_index(name)?; 175 + 176 + // Remove from storage 177 + self.storage.delete_index_config(name).await?; 178 + 179 + // Remove associated data 180 + let data_keys = self.storage.list_index_data_keys(&format!("stats_{}", name)).await?; 181 + for key in data_keys { 182 + self.storage.delete_index_data(&key).await?; 183 + } 184 + 185 + debug!("Removed index from storage: {}", name); 186 + Ok(()) 187 + } 188 + 189 + /// Add a node to indexes and persist changes 190 + pub async fn add_node(&self, node_id: NodeId, labels: &[LabelId], properties: &HashMap<PropertyKeyId, PropertyValue>) -> Result<()> { 191 + self.ensure_loaded().await?; 192 + 193 + self.memory_manager.add_node(node_id, labels, properties)?; 194 + 195 + // For performance, we might want to batch these writes or do them asynchronously 196 + // For now, we'll just trigger a save periodically rather than on every operation 197 + 198 + Ok(()) 199 + } 200 + 201 + /// Remove a node from indexes and persist changes 202 + pub async fn remove_node(&self, node_id: NodeId) -> Result<()> { 203 + self.ensure_loaded().await?; 204 + 205 + self.memory_manager.remove_node(node_id)?; 206 + 207 + Ok(()) 208 + } 209 + 210 + /// Update node properties in indexes and persist changes 211 + pub async fn update_node_properties(&self, node_id: NodeId, old_properties: &HashMap<PropertyKeyId, PropertyValue>, new_properties: &HashMap<PropertyKeyId, PropertyValue>) -> Result<()> { 212 + self.ensure_loaded().await?; 213 + 214 + self.memory_manager.update_node_properties(node_id, old_properties, new_properties)?; 215 + 216 + Ok(()) 217 + } 218 + 219 + /// Query an index 220 + pub async fn query(&self, index_type: &IndexType, query: &IndexQuery) -> Result<IndexQueryResult> { 221 + self.ensure_loaded().await?; 222 + 223 + self.memory_manager.query(index_type, query) 224 + } 225 + 226 + /// Get nodes by label 227 + pub async fn get_nodes_by_label(&self, label_id: LabelId) -> Result<Vec<NodeId>> { 228 + self.ensure_loaded().await?; 229 + 230 + self.memory_manager.get_nodes_by_label(label_id) 231 + } 232 + 233 + /// Get nodes by property 234 + pub async fn get_nodes_by_property(&self, property_key_id: PropertyKeyId, value: &PropertyValue) -> Result<Vec<NodeId>> { 235 + self.ensure_loaded().await?; 236 + 237 + self.memory_manager.get_nodes_by_property(property_key_id, value) 238 + } 239 + 240 + /// Get nodes by composite key 241 + pub async fn get_nodes_by_composite_key(&self, property_key_ids: &[PropertyKeyId], values: &[PropertyValue]) -> Result<Vec<NodeId>> { 242 + self.ensure_loaded().await?; 243 + 244 + self.memory_manager.get_nodes_by_composite_key(property_key_ids, values) 245 + } 246 + 247 + /// Query composite index with prefix 248 + pub async fn query_composite_prefix(&self, property_key_ids: &[PropertyKeyId], partial_values: &[PropertyValue]) -> Result<Vec<NodeId>> { 249 + self.ensure_loaded().await?; 250 + 251 + self.memory_manager.query_composite_prefix(property_key_ids, partial_values) 252 + } 253 + 254 + /// List all available indexes 255 + pub async fn list_indexes(&self) -> Result<Vec<IndexConfig>> { 256 + self.ensure_loaded().await?; 257 + 258 + Ok(self.memory_manager.list_indexes()) 259 + } 260 + 261 + /// Get statistics for all indexes 262 + pub async fn get_index_stats(&self) -> Result<HashMap<String, IndexStats>> { 263 + self.ensure_loaded().await?; 264 + 265 + Ok(self.memory_manager.get_index_stats()) 266 + } 267 + 268 + /// Force a save of all indexes to storage 269 + pub async fn flush(&self) -> Result<()> { 270 + self.save_indexes().await?; 271 + self.storage.flush().await?; 272 + Ok(()) 273 + } 274 + 275 + /// Compact storage 276 + pub async fn compact(&self) -> Result<()> { 277 + self.storage.compact().await?; 278 + Ok(()) 279 + } 280 + } 281 + 282 + #[cfg(test)] 283 + mod tests { 284 + use super::*; 285 + use crate::storage::MemoryStore; 286 + use crate::core::PropertyValue; 287 + use std::collections::HashMap; 288 + 289 + #[tokio::test] 290 + async fn test_persistent_index_manager_basic() { 291 + let storage = Arc::new(MemoryStore::new()); 292 + let manager = PersistentIndexManager::new(storage); 293 + 294 + // Create an index 295 + let property_key_id = PropertyKeyId(1); 296 + let index_type = IndexType::Property(property_key_id); 297 + let index_name = manager.create_index(index_type, Some("test_index".to_string()), false).await.unwrap(); 298 + assert_eq!(index_name, "test_index"); 299 + 300 + // List indexes 301 + let indexes = manager.list_indexes().await.unwrap(); 302 + assert_eq!(indexes.len(), 1); 303 + assert_eq!(indexes[0].name.as_ref().unwrap(), "test_index"); 304 + 305 + // Add a node 306 + let node_id = NodeId(1); 307 + let labels = vec![LabelId(1)]; 308 + let mut properties = HashMap::new(); 309 + properties.insert(property_key_id, PropertyValue::String("test".to_string())); 310 + 311 + manager.add_node(node_id, &labels, &properties).await.unwrap(); 312 + 313 + // Query by property 314 + let nodes = manager.get_nodes_by_property(property_key_id, &PropertyValue::String("test".to_string())).await.unwrap(); 315 + assert_eq!(nodes.len(), 1); 316 + assert_eq!(nodes[0], node_id); 317 + 318 + // Drop index 319 + manager.drop_index("test_index").await.unwrap(); 320 + 321 + let indexes_after = manager.list_indexes().await.unwrap(); 322 + assert_eq!(indexes_after.len(), 0); 323 + } 324 + 325 + #[tokio::test] 326 + async fn test_persistent_index_manager_save_load() { 327 + let storage = Arc::new(MemoryStore::new()); 328 + 329 + // Create manager and add an index 330 + { 331 + let manager = PersistentIndexManager::new(storage.clone()); 332 + 333 + let property_key_id = PropertyKeyId(1); 334 + let index_type = IndexType::Property(property_key_id); 335 + manager.create_index(index_type, Some("persistent_test".to_string()), false).await.unwrap(); 336 + 337 + // Force save 338 + manager.flush().await.unwrap(); 339 + } 340 + 341 + // Create new manager with same storage and verify index is loaded 342 + { 343 + let manager = PersistentIndexManager::new(storage); 344 + 345 + let indexes = manager.list_indexes().await.unwrap(); 346 + assert_eq!(indexes.len(), 1); 347 + assert_eq!(indexes[0].name.as_ref().unwrap(), "persistent_test"); 348 + } 349 + } 350 + }
+458
src/index/property_index.rs
··· 1 + use std::collections::BTreeMap; 2 + use std::sync::Arc; 3 + use roaring::RoaringBitmap; 4 + use dashmap::DashMap; 5 + use parking_lot::RwLock; 6 + use crate::{NodeId, PropertyKeyId, Result}; 7 + use crate::core::PropertyValue; 8 + use crate::index::types::{IndexQuery, IndexQueryResult, IndexQueryStats, IndexError, IndexStats}; 9 + use chrono; 10 + use tracing::{debug, warn, instrument}; 11 + use std::time::Instant; 12 + 13 + /// Efficient property value index using multiple data structures for different query types 14 + pub struct PropertyValueIndex { 15 + /// Hash index for exact value lookups - O(1) average case 16 + exact_index: Arc<DashMap<PropertyValue, RoaringBitmap>>, 17 + 18 + /// Range index for numeric/ordered values - O(log n) range queries 19 + range_index: Arc<RwLock<BTreeMap<PropertyValue, RoaringBitmap>>>, 20 + 21 + /// Prefix index for string prefix matching 22 + prefix_index: Arc<DashMap<String, RoaringBitmap>>, 23 + 24 + /// Statistics tracking 25 + stats: Arc<RwLock<IndexStats>>, 26 + 27 + /// Property key this index covers 28 + property_key: PropertyKeyId, 29 + 30 + /// Configuration 31 + unique: bool, 32 + } 33 + 34 + impl PropertyValueIndex { 35 + pub fn new(property_key: PropertyKeyId, unique: bool) -> Self { 36 + Self { 37 + exact_index: Arc::new(DashMap::new()), 38 + range_index: Arc::new(RwLock::new(BTreeMap::new())), 39 + prefix_index: Arc::new(DashMap::new()), 40 + stats: Arc::new(RwLock::new(IndexStats::default())), 41 + property_key, 42 + unique, 43 + } 44 + } 45 + 46 + /// Add a node to the index for a specific property value 47 + #[instrument(skip(self), level = "debug")] 48 + pub fn add_node(&self, node_id: NodeId, value: &PropertyValue) -> Result<()> { 49 + let start = Instant::now(); 50 + 51 + // Check unique constraint if enabled 52 + if self.unique && self.has_value(value)? { 53 + return Err(IndexError::UniqueConstraintViolation( 54 + format!("Property {} with value {:?}", self.property_key.0, value) 55 + ).into()); 56 + } 57 + 58 + // Add to exact index 59 + self.exact_index 60 + .entry(value.clone()) 61 + .or_insert_with(RoaringBitmap::new) 62 + .insert(node_id.0 as u32); 63 + 64 + // Add to range index if the value is orderable 65 + if self.is_orderable(value) { 66 + let mut range_index = self.range_index.write(); 67 + range_index 68 + .entry(value.clone()) 69 + .or_insert_with(RoaringBitmap::new) 70 + .insert(node_id.0 as u32); 71 + } 72 + 73 + // Add to prefix index for strings 74 + if let PropertyValue::String(s) = value { 75 + self.add_string_prefixes(node_id, s); 76 + } 77 + 78 + // Update statistics 79 + self.update_stats_add(); 80 + 81 + debug!( 82 + property_key = %self.property_key.0, 83 + node_id = %node_id.0, 84 + value = ?value, 85 + duration_micros = start.elapsed().as_micros(), 86 + "Added node to property index" 87 + ); 88 + 89 + Ok(()) 90 + } 91 + 92 + /// Remove a node from the index for a specific property value 93 + #[instrument(skip(self), level = "debug")] 94 + pub fn remove_node(&self, node_id: NodeId, value: &PropertyValue) -> Result<()> { 95 + // Remove from exact index 96 + if let Some(mut bitmap) = self.exact_index.get_mut(value) { 97 + bitmap.remove(node_id.0 as u32); 98 + if bitmap.is_empty() { 99 + drop(bitmap); 100 + self.exact_index.remove(value); 101 + } 102 + } 103 + 104 + // Remove from range index 105 + if self.is_orderable(value) { 106 + let mut range_index = self.range_index.write(); 107 + if let Some(bitmap) = range_index.get_mut(value) { 108 + bitmap.remove(node_id.0 as u32); 109 + if bitmap.is_empty() { 110 + range_index.remove(value); 111 + } 112 + } 113 + } 114 + 115 + // Remove from prefix index for strings 116 + if let PropertyValue::String(s) = value { 117 + self.remove_string_prefixes(node_id, s); 118 + } 119 + 120 + // Update statistics 121 + self.update_stats_remove(); 122 + 123 + debug!( 124 + property_key = %self.property_key.0, 125 + node_id = %node_id.0, 126 + value = ?value, 127 + "Removed node from property index" 128 + ); 129 + 130 + Ok(()) 131 + } 132 + 133 + /// Query the index 134 + #[instrument(skip(self), level = "debug")] 135 + pub fn query(&self, query: &IndexQuery) -> Result<IndexQueryResult> { 136 + let start = Instant::now(); 137 + let mut stats = IndexQueryStats::default(); 138 + stats.index_used = true; 139 + 140 + let nodes = match query { 141 + IndexQuery::Exact(value) => { 142 + debug!("Executing exact query for value: {:?}", value); 143 + self.query_exact(value)? 144 + }, 145 + IndexQuery::Range { min, max, inclusive_min, inclusive_max } => { 146 + debug!("Executing range query: {:?} to {:?}", min, max); 147 + self.query_range(min.as_ref(), max.as_ref(), *inclusive_min, *inclusive_max)? 148 + }, 149 + IndexQuery::In(values) => { 150 + debug!("Executing IN query for {} values", values.len()); 151 + self.query_in(values)? 152 + }, 153 + IndexQuery::Prefix(prefix) => { 154 + debug!("Executing prefix query for: '{}'", prefix); 155 + self.query_prefix(prefix)? 156 + }, 157 + IndexQuery::Exists => { 158 + debug!("Executing EXISTS query"); 159 + self.query_exists()? 160 + }, 161 + _ => { 162 + return Err(IndexError::InvalidQuery( 163 + format!("Query type not supported by property index: {}", query) 164 + ).into()); 165 + } 166 + }; 167 + 168 + stats.execution_time_micros = start.elapsed().as_micros() as u64; 169 + stats.nodes_returned = nodes.len() as u64; 170 + 171 + // Update query statistics 172 + self.update_query_stats(); 173 + 174 + debug!( 175 + property_key = %self.property_key.0, 176 + query = %query, 177 + nodes_returned = nodes.len(), 178 + duration_micros = stats.execution_time_micros, 179 + "Property index query completed" 180 + ); 181 + 182 + Ok(IndexQueryResult { nodes, stats }) 183 + } 184 + 185 + /// Check if a value exists in the index 186 + pub fn has_value(&self, value: &PropertyValue) -> Result<bool> { 187 + Ok(self.exact_index.get(value).map_or(false, |bitmap| !bitmap.is_empty())) 188 + } 189 + 190 + /// Get current index statistics 191 + pub fn get_stats(&self) -> IndexStats { 192 + self.stats.read().clone() 193 + } 194 + 195 + /// Rebuild the entire index (useful for optimization) 196 + pub fn rebuild(&self) -> Result<()> { 197 + debug!(property_key = %self.property_key.0, "Rebuilding property index"); 198 + 199 + // Clear all indexes 200 + self.exact_index.clear(); 201 + self.range_index.write().clear(); 202 + self.prefix_index.clear(); 203 + 204 + // Reset statistics 205 + let mut stats = self.stats.write(); 206 + stats.last_rebuild = Some(chrono::Utc::now()); 207 + stats.total_nodes = 0; 208 + stats.unique_values = 0; 209 + 210 + Ok(()) 211 + } 212 + 213 + // Private helper methods 214 + 215 + fn query_exact(&self, value: &PropertyValue) -> Result<Vec<NodeId>> { 216 + Ok(self.exact_index 217 + .get(value) 218 + .map(|bitmap| bitmap.iter().map(|id| NodeId(id as u64)).collect()) 219 + .unwrap_or_default()) 220 + } 221 + 222 + fn query_range( 223 + &self, 224 + min: Option<&PropertyValue>, 225 + max: Option<&PropertyValue>, 226 + inclusive_min: bool, 227 + inclusive_max: bool, 228 + ) -> Result<Vec<NodeId>> { 229 + let range_index = self.range_index.read(); 230 + let mut result = RoaringBitmap::new(); 231 + 232 + // Collect matching entries based on range criteria 233 + for (key, bitmap) in range_index.iter() { 234 + let mut include = true; 235 + 236 + // Check minimum bound 237 + if let Some(min_val) = min { 238 + if inclusive_min { 239 + if key < min_val { 240 + include = false; 241 + } 242 + } else { 243 + if key <= min_val { 244 + include = false; 245 + } 246 + } 247 + } 248 + 249 + // Check maximum bound 250 + if include { 251 + if let Some(max_val) = max { 252 + if inclusive_max { 253 + if key > max_val { 254 + include = false; 255 + } 256 + } else { 257 + if key >= max_val { 258 + include = false; 259 + } 260 + } 261 + } 262 + } 263 + 264 + if include { 265 + result |= bitmap; 266 + } 267 + } 268 + 269 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 270 + } 271 + 272 + fn query_in(&self, values: &[PropertyValue]) -> Result<Vec<NodeId>> { 273 + let mut result = RoaringBitmap::new(); 274 + 275 + for value in values { 276 + if let Some(bitmap) = self.exact_index.get(value) { 277 + result |= &*bitmap; 278 + } 279 + } 280 + 281 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 282 + } 283 + 284 + fn query_prefix(&self, prefix: &str) -> Result<Vec<NodeId>> { 285 + let mut result = RoaringBitmap::new(); 286 + 287 + // Find all prefixes that start with the given prefix 288 + for entry in self.prefix_index.iter() { 289 + if entry.key().starts_with(prefix) { 290 + result |= &*entry.value(); 291 + } 292 + } 293 + 294 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 295 + } 296 + 297 + fn query_exists(&self) -> Result<Vec<NodeId>> { 298 + let mut result = RoaringBitmap::new(); 299 + 300 + for entry in self.exact_index.iter() { 301 + result |= &*entry.value(); 302 + } 303 + 304 + Ok(result.iter().map(|id| NodeId(id as u64)).collect()) 305 + } 306 + 307 + fn add_string_prefixes(&self, node_id: NodeId, s: &str) { 308 + // Add prefixes of increasing length for efficient prefix matching 309 + for i in 1..=std::cmp::min(s.len(), 10) { // Limit prefix length to avoid explosion 310 + let prefix = s.chars().take(i).collect::<String>(); 311 + self.prefix_index 312 + .entry(prefix) 313 + .or_insert_with(RoaringBitmap::new) 314 + .insert(node_id.0 as u32); 315 + } 316 + } 317 + 318 + fn remove_string_prefixes(&self, node_id: NodeId, s: &str) { 319 + for i in 1..=std::cmp::min(s.len(), 10) { 320 + let prefix = s.chars().take(i).collect::<String>(); 321 + if let Some(mut bitmap) = self.prefix_index.get_mut(&prefix) { 322 + bitmap.remove(node_id.0 as u32); 323 + if bitmap.is_empty() { 324 + drop(bitmap); 325 + self.prefix_index.remove(&prefix); 326 + } 327 + } 328 + } 329 + } 330 + 331 + fn is_orderable(&self, value: &PropertyValue) -> bool { 332 + matches!(value, 333 + PropertyValue::Integer(_) | 334 + PropertyValue::Float(_) | 335 + PropertyValue::String(_) | 336 + PropertyValue::Boolean(_) 337 + ) 338 + } 339 + 340 + fn update_stats_add(&self) { 341 + let mut stats = self.stats.write(); 342 + stats.total_nodes += 1; 343 + stats.last_updated = chrono::Utc::now(); 344 + } 345 + 346 + fn update_stats_remove(&self) { 347 + let mut stats = self.stats.write(); 348 + if stats.total_nodes > 0 { 349 + stats.total_nodes -= 1; 350 + } 351 + stats.last_updated = chrono::Utc::now(); 352 + } 353 + 354 + fn update_query_stats(&self) { 355 + let mut stats = self.stats.write(); 356 + stats.query_count += 1; 357 + // Simple hit rate calculation (could be more sophisticated) 358 + stats.hit_rate = stats.query_count as f64 / (stats.query_count + 1) as f64; 359 + } 360 + } 361 + 362 + #[cfg(test)] 363 + mod tests { 364 + use super::*; 365 + 366 + #[test] 367 + fn test_exact_query() { 368 + let index = PropertyValueIndex::new(PropertyKeyId(0), false); 369 + let node1 = NodeId(1); 370 + let node2 = NodeId(2); 371 + let value = PropertyValue::String("test".to_string()); 372 + 373 + // Add nodes 374 + index.add_node(node1, &value).unwrap(); 375 + index.add_node(node2, &value).unwrap(); 376 + 377 + // Query 378 + let result = index.query(&IndexQuery::Exact(value.clone())).unwrap(); 379 + assert_eq!(result.nodes.len(), 2); 380 + assert!(result.nodes.contains(&node1)); 381 + assert!(result.nodes.contains(&node2)); 382 + } 383 + 384 + #[test] 385 + fn test_range_query() { 386 + let index = PropertyValueIndex::new(PropertyKeyId(0), false); 387 + let node1 = NodeId(1); 388 + let node2 = NodeId(2); 389 + let node3 = NodeId(3); 390 + 391 + index.add_node(node1, &PropertyValue::Integer(10)).unwrap(); 392 + index.add_node(node2, &PropertyValue::Integer(20)).unwrap(); 393 + index.add_node(node3, &PropertyValue::Integer(30)).unwrap(); 394 + 395 + // Range query [15, 25] 396 + let result = index.query(&IndexQuery::Range { 397 + min: Some(PropertyValue::Integer(15)), 398 + max: Some(PropertyValue::Integer(25)), 399 + inclusive_min: true, 400 + inclusive_max: true, 401 + }).unwrap(); 402 + 403 + assert_eq!(result.nodes.len(), 1); 404 + assert!(result.nodes.contains(&node2)); 405 + } 406 + 407 + #[test] 408 + fn test_prefix_query() { 409 + let index = PropertyValueIndex::new(PropertyKeyId(0), false); 410 + let node1 = NodeId(1); 411 + let node2 = NodeId(2); 412 + 413 + index.add_node(node1, &PropertyValue::String("hello".to_string())).unwrap(); 414 + index.add_node(node2, &PropertyValue::String("help".to_string())).unwrap(); 415 + 416 + // Prefix query for "hel" 417 + let result = index.query(&IndexQuery::Prefix("hel".to_string())).unwrap(); 418 + assert_eq!(result.nodes.len(), 2); 419 + assert!(result.nodes.contains(&node1)); 420 + assert!(result.nodes.contains(&node2)); 421 + } 422 + 423 + #[test] 424 + fn test_unique_constraint() { 425 + let index = PropertyValueIndex::new(PropertyKeyId(0), true); 426 + let node1 = NodeId(1); 427 + let node2 = NodeId(2); 428 + let value = PropertyValue::String("unique".to_string()); 429 + 430 + // First insertion should succeed 431 + assert!(index.add_node(node1, &value).is_ok()); 432 + 433 + // Second insertion should fail due to unique constraint 434 + assert!(index.add_node(node2, &value).is_err()); 435 + } 436 + 437 + #[test] 438 + fn test_in_query() { 439 + let index = PropertyValueIndex::new(PropertyKeyId(0), false); 440 + let node1 = NodeId(1); 441 + let node2 = NodeId(2); 442 + let node3 = NodeId(3); 443 + 444 + index.add_node(node1, &PropertyValue::String("a".to_string())).unwrap(); 445 + index.add_node(node2, &PropertyValue::String("b".to_string())).unwrap(); 446 + index.add_node(node3, &PropertyValue::String("c".to_string())).unwrap(); 447 + 448 + // IN query for ["a", "c"] 449 + let result = index.query(&IndexQuery::In(vec![ 450 + PropertyValue::String("a".to_string()), 451 + PropertyValue::String("c".to_string()), 452 + ])).unwrap(); 453 + 454 + assert_eq!(result.nodes.len(), 2); 455 + assert!(result.nodes.contains(&node1)); 456 + assert!(result.nodes.contains(&node3)); 457 + } 458 + }
+184
src/index/types.rs
··· 1 + use std::fmt; 2 + use serde::{Serialize, Deserialize}; 3 + use crate::{NodeId, PropertyKeyId, LabelId}; 4 + use crate::core::PropertyValue; 5 + 6 + /// Types of indexes supported by GigaBrain 7 + #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] 8 + pub enum IndexType { 9 + /// Index on node labels 10 + Label(LabelId), 11 + /// Index on a single property 12 + Property(PropertyKeyId), 13 + /// Composite index on multiple properties 14 + Composite(Vec<PropertyKeyId>), 15 + /// Full-text search index 16 + FullText(PropertyKeyId), 17 + /// Range index for numeric/date properties 18 + Range(PropertyKeyId), 19 + } 20 + 21 + impl fmt::Display for IndexType { 22 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 23 + match self { 24 + IndexType::Label(label_id) => write!(f, "Label({})", label_id.0), 25 + IndexType::Property(prop_id) => write!(f, "Property({})", prop_id.0), 26 + IndexType::Composite(prop_ids) => { 27 + write!(f, "Composite([{}])", 28 + prop_ids.iter() 29 + .map(|id| id.0.to_string()) 30 + .collect::<Vec<_>>() 31 + .join(", ")) 32 + }, 33 + IndexType::FullText(prop_id) => write!(f, "FullText({})", prop_id.0), 34 + IndexType::Range(prop_id) => write!(f, "Range({})", prop_id.0), 35 + } 36 + } 37 + } 38 + 39 + /// Index configuration and metadata 40 + #[derive(Debug, Clone, Serialize, Deserialize)] 41 + pub struct IndexConfig { 42 + pub index_type: IndexType, 43 + pub name: Option<String>, 44 + pub unique: bool, 45 + pub created_at: chrono::DateTime<chrono::Utc>, 46 + pub last_updated: chrono::DateTime<chrono::Utc>, 47 + pub node_count: u64, 48 + } 49 + 50 + impl IndexConfig { 51 + pub fn new(index_type: IndexType, name: Option<String>, unique: bool) -> Self { 52 + let now = chrono::Utc::now(); 53 + Self { 54 + index_type, 55 + name, 56 + unique, 57 + created_at: now, 58 + last_updated: now, 59 + node_count: 0, 60 + } 61 + } 62 + } 63 + 64 + /// Index statistics for monitoring and optimization 65 + #[derive(Debug, Clone, Serialize, Deserialize)] 66 + pub struct IndexStats { 67 + pub total_nodes: u64, 68 + pub unique_values: u64, 69 + pub memory_usage_bytes: u64, 70 + pub last_rebuild: Option<chrono::DateTime<chrono::Utc>>, 71 + pub last_updated: chrono::DateTime<chrono::Utc>, 72 + pub query_count: u64, 73 + pub hit_rate: f64, 74 + } 75 + 76 + impl Default for IndexStats { 77 + fn default() -> Self { 78 + Self { 79 + total_nodes: 0, 80 + unique_values: 0, 81 + memory_usage_bytes: 0, 82 + last_rebuild: None, 83 + last_updated: chrono::Utc::now(), 84 + query_count: 0, 85 + hit_rate: 0.0, 86 + } 87 + } 88 + } 89 + 90 + /// Query conditions for index lookups 91 + #[derive(Debug, Clone, PartialEq)] 92 + pub enum IndexQuery { 93 + /// Exact match for a single value 94 + Exact(PropertyValue), 95 + /// Range query (for numeric types) 96 + Range { 97 + min: Option<PropertyValue>, 98 + max: Option<PropertyValue>, 99 + inclusive_min: bool, 100 + inclusive_max: bool, 101 + }, 102 + /// Set membership query 103 + In(Vec<PropertyValue>), 104 + /// Prefix match (for strings) 105 + Prefix(String), 106 + /// Full-text search query 107 + FullText(String), 108 + /// Exists (property has any value) 109 + Exists, 110 + /// Composite query (for composite indexes) 111 + Composite(Vec<PropertyValue>), 112 + } 113 + 114 + impl fmt::Display for IndexQuery { 115 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 116 + match self { 117 + IndexQuery::Exact(value) => write!(f, "= {:?}", value), 118 + IndexQuery::Range { min, max, inclusive_min, inclusive_max } => { 119 + let min_bracket = if *inclusive_min { "[" } else { "(" }; 120 + let max_bracket = if *inclusive_max { "]" } else { ")" }; 121 + write!(f, "{}{:?}, {:?}{}", min_bracket, min, max, max_bracket) 122 + }, 123 + IndexQuery::In(values) => write!(f, "IN {:?}", values), 124 + IndexQuery::Prefix(prefix) => write!(f, "PREFIX '{}'", prefix), 125 + IndexQuery::FullText(query) => write!(f, "FULLTEXT '{}'", query), 126 + IndexQuery::Exists => write!(f, "EXISTS"), 127 + IndexQuery::Composite(values) => write!(f, "COMPOSITE {:?}", values), 128 + } 129 + } 130 + } 131 + 132 + /// Result of an index query 133 + #[derive(Debug, Clone)] 134 + pub struct IndexQueryResult { 135 + pub nodes: Vec<NodeId>, 136 + pub stats: IndexQueryStats, 137 + } 138 + 139 + /// Statistics about an index query execution 140 + #[derive(Debug, Clone)] 141 + pub struct IndexQueryStats { 142 + pub index_used: bool, 143 + pub index_type: Option<IndexType>, 144 + pub execution_time_micros: u64, 145 + pub nodes_scanned: u64, 146 + pub nodes_returned: u64, 147 + } 148 + 149 + impl Default for IndexQueryStats { 150 + fn default() -> Self { 151 + Self { 152 + index_used: false, 153 + index_type: None, 154 + execution_time_micros: 0, 155 + nodes_scanned: 0, 156 + nodes_returned: 0, 157 + } 158 + } 159 + } 160 + 161 + /// Error types for indexing operations 162 + #[derive(Debug, thiserror::Error)] 163 + pub enum IndexError { 164 + #[error("Index {0} already exists")] 165 + AlreadyExists(String), 166 + 167 + #[error("Index {0} not found")] 168 + NotFound(String), 169 + 170 + #[error("Unsupported index type: {0}")] 171 + UnsupportedType(String), 172 + 173 + #[error("Invalid query for index type: {0}")] 174 + InvalidQuery(String), 175 + 176 + #[error("Unique constraint violation for index {0}")] 177 + UniqueConstraintViolation(String), 178 + 179 + #[error("Index rebuild failed: {0}")] 180 + RebuildFailed(String), 181 + 182 + #[error("Storage error: {0}")] 183 + Storage(#[from] crate::GigabrainError), 184 + }
+4 -1
src/lib.rs
··· 1 1 pub mod core; 2 2 pub mod storage; 3 + pub mod persistence; 3 4 pub mod cypher; 4 5 pub mod algorithms; 5 6 pub mod index; ··· 10 11 pub mod observability; 11 12 pub mod cli; 12 13 13 - pub use core::{Graph, Node, Relationship, Property}; 14 + pub use core::{Graph, PersistentGraph, Node, Relationship, Property}; 14 15 pub use error::{GigabrainError, Result}; 15 16 pub use observability::{ObservabilitySystem, HealthLevel, HealthStatus}; 17 + pub use persistence::{PersistentStorage, StorageBackend}; 18 + pub use index::{IndexManager, IndexType, IndexQuery, IndexConfig}; 16 19 use serde::{Serialize, Deserialize}; 17 20 18 21 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
+239
src/persistence/memory_store.rs
··· 1 + use crate::{Result, NodeId, RelationshipId}; 2 + use crate::core::{Node, Relationship, GraphSchema}; 3 + use crate::persistence::{StorageBackend, SerializableNode, SerializableRelationship}; 4 + use async_trait::async_trait; 5 + use std::collections::HashMap; 6 + use std::sync::RwLock; 7 + 8 + /// In-memory storage backend for testing and development 9 + pub struct MemoryStore { 10 + nodes: RwLock<HashMap<NodeId, Node>>, 11 + relationships: RwLock<HashMap<RelationshipId, Relationship>>, 12 + schema: RwLock<Option<GraphSchema>>, 13 + metadata: RwLock<HashMap<String, Vec<u8>>>, 14 + } 15 + 16 + impl MemoryStore { 17 + pub fn new() -> Self { 18 + Self { 19 + nodes: RwLock::new(HashMap::new()), 20 + relationships: RwLock::new(HashMap::new()), 21 + schema: RwLock::new(None), 22 + metadata: RwLock::new(HashMap::new()), 23 + } 24 + } 25 + } 26 + 27 + impl Default for MemoryStore { 28 + fn default() -> Self { 29 + Self::new() 30 + } 31 + } 32 + 33 + #[async_trait] 34 + impl StorageBackend for MemoryStore { 35 + async fn initialize(&self) -> Result<()> { 36 + tracing::info!("Memory storage initialized"); 37 + Ok(()) 38 + } 39 + 40 + async fn store_node(&self, node_id: NodeId, node: &Node) -> Result<()> { 41 + let mut nodes = self.nodes.write().unwrap(); 42 + nodes.insert(node_id, node.clone()); 43 + Ok(()) 44 + } 45 + 46 + async fn get_node(&self, node_id: NodeId) -> Result<Option<Node>> { 47 + let nodes = self.nodes.read().unwrap(); 48 + Ok(nodes.get(&node_id).cloned()) 49 + } 50 + 51 + async fn store_relationship(&self, rel_id: RelationshipId, relationship: &Relationship) -> Result<()> { 52 + let mut relationships = self.relationships.write().unwrap(); 53 + relationships.insert(rel_id, relationship.clone()); 54 + Ok(()) 55 + } 56 + 57 + async fn get_relationship(&self, rel_id: RelationshipId) -> Result<Option<Relationship>> { 58 + let relationships = self.relationships.read().unwrap(); 59 + Ok(relationships.get(&rel_id).cloned()) 60 + } 61 + 62 + async fn store_schema(&self, schema: &GraphSchema) -> Result<()> { 63 + let mut stored_schema = self.schema.write().unwrap(); 64 + *stored_schema = Some(schema.clone()); 65 + Ok(()) 66 + } 67 + 68 + async fn get_schema(&self) -> Result<Option<GraphSchema>> { 69 + let schema = self.schema.read().unwrap(); 70 + Ok(schema.clone()) 71 + } 72 + 73 + async fn get_all_node_ids(&self) -> Result<Vec<NodeId>> { 74 + let nodes = self.nodes.read().unwrap(); 75 + Ok(nodes.keys().copied().collect()) 76 + } 77 + 78 + async fn get_all_relationship_ids(&self) -> Result<Vec<RelationshipId>> { 79 + let relationships = self.relationships.read().unwrap(); 80 + Ok(relationships.keys().copied().collect()) 81 + } 82 + 83 + async fn delete_node(&self, node_id: NodeId) -> Result<()> { 84 + let mut nodes = self.nodes.write().unwrap(); 85 + nodes.remove(&node_id); 86 + Ok(()) 87 + } 88 + 89 + async fn delete_relationship(&self, rel_id: RelationshipId) -> Result<()> { 90 + let mut relationships = self.relationships.write().unwrap(); 91 + relationships.remove(&rel_id); 92 + Ok(()) 93 + } 94 + 95 + async fn store_metadata(&self, key: &str, value: &[u8]) -> Result<()> { 96 + let mut metadata = self.metadata.write().unwrap(); 97 + metadata.insert(key.to_string(), value.to_vec()); 98 + Ok(()) 99 + } 100 + 101 + async fn get_metadata(&self, key: &str) -> Result<Option<Vec<u8>>> { 102 + let metadata = self.metadata.read().unwrap(); 103 + Ok(metadata.get(key).cloned()) 104 + } 105 + 106 + async fn flush(&self) -> Result<()> { 107 + // No-op for memory store 108 + Ok(()) 109 + } 110 + 111 + async fn close(&self) -> Result<()> { 112 + tracing::info!("Closing memory storage"); 113 + Ok(()) 114 + } 115 + } 116 + 117 + #[cfg(test)] 118 + mod tests { 119 + use super::*; 120 + use crate::core::PropertyValue; 121 + use std::collections::HashMap; 122 + 123 + #[tokio::test] 124 + async fn test_memory_store_node_operations() -> Result<()> { 125 + let store = MemoryStore::new(); 126 + store.initialize().await?; 127 + 128 + // Create a test node 129 + let node_id = NodeId(42); 130 + let mut properties = HashMap::new(); 131 + properties.insert(crate::PropertyKeyId(0), PropertyValue::String("Alice".to_string())); 132 + properties.insert(crate::PropertyKeyId(1), PropertyValue::Integer(30)); 133 + 134 + let node = Node { 135 + id: node_id, 136 + labels: vec![crate::LabelId(0)], 137 + properties, 138 + }; 139 + 140 + // Store the node 141 + store.store_node(node_id, &node).await?; 142 + 143 + // Retrieve the node 144 + let retrieved = store.get_node(node_id).await?; 145 + assert!(retrieved.is_some()); 146 + let retrieved_node = retrieved.unwrap(); 147 + assert_eq!(retrieved_node.id, node.id); 148 + assert_eq!(retrieved_node.labels, node.labels); 149 + assert_eq!(retrieved_node.properties.len(), node.properties.len()); 150 + 151 + // Delete the node 152 + store.delete_node(node_id).await?; 153 + let deleted = store.get_node(node_id).await?; 154 + assert!(deleted.is_none()); 155 + 156 + Ok(()) 157 + } 158 + 159 + #[tokio::test] 160 + async fn test_memory_store_relationship_operations() -> Result<()> { 161 + let store = MemoryStore::new(); 162 + store.initialize().await?; 163 + 164 + // Create a test relationship 165 + let rel_id = RelationshipId(123); 166 + let mut properties = HashMap::new(); 167 + properties.insert(crate::PropertyKeyId(0), PropertyValue::String("since".to_string())); 168 + 169 + let relationship = Relationship { 170 + id: rel_id, 171 + start_node: NodeId(1), 172 + end_node: NodeId(2), 173 + rel_type: 0, 174 + properties, 175 + }; 176 + 177 + // Store the relationship 178 + store.store_relationship(rel_id, &relationship).await?; 179 + 180 + // Retrieve the relationship 181 + let retrieved = store.get_relationship(rel_id).await?; 182 + assert!(retrieved.is_some()); 183 + let retrieved_rel = retrieved.unwrap(); 184 + assert_eq!(retrieved_rel.id, relationship.id); 185 + assert_eq!(retrieved_rel.start_node, relationship.start_node); 186 + assert_eq!(retrieved_rel.end_node, relationship.end_node); 187 + 188 + // Delete the relationship 189 + store.delete_relationship(rel_id).await?; 190 + let deleted = store.get_relationship(rel_id).await?; 191 + assert!(deleted.is_none()); 192 + 193 + Ok(()) 194 + } 195 + 196 + #[tokio::test] 197 + async fn test_memory_store_metadata_operations() -> Result<()> { 198 + let store = MemoryStore::new(); 199 + store.initialize().await?; 200 + 201 + // Store metadata 202 + let key = "test_counter"; 203 + let value = 12345u64.to_be_bytes(); 204 + store.store_metadata(key, &value).await?; 205 + 206 + // Retrieve metadata 207 + let retrieved = store.get_metadata(key).await?; 208 + assert!(retrieved.is_some()); 209 + let retrieved_bytes = retrieved.unwrap(); 210 + assert_eq!(retrieved_bytes, value); 211 + 212 + Ok(()) 213 + } 214 + 215 + #[tokio::test] 216 + async fn test_memory_store_schema_operations() -> Result<()> { 217 + let store = MemoryStore::new(); 218 + store.initialize().await?; 219 + 220 + // Create a test schema 221 + let mut schema = GraphSchema::new(); 222 + schema.get_or_create_label("Person"); 223 + schema.get_or_create_property_key("name"); 224 + schema.get_or_create_relationship_type("KNOWS"); 225 + 226 + // Store the schema 227 + store.store_schema(&schema).await?; 228 + 229 + // Retrieve the schema 230 + let retrieved = store.get_schema().await?; 231 + assert!(retrieved.is_some()); 232 + let retrieved_schema = retrieved.unwrap(); 233 + assert_eq!(retrieved_schema.labels.len(), schema.labels.len()); 234 + assert_eq!(retrieved_schema.property_keys.len(), schema.property_keys.len()); 235 + assert_eq!(retrieved_schema.relationship_types.len(), schema.relationship_types.len()); 236 + 237 + Ok(()) 238 + } 239 + }
+295
src/persistence/mod.rs
··· 1 + use crate::{Result, GigabrainError, NodeId, RelationshipId}; 2 + use crate::core::{Node, Relationship, GraphSchema}; 3 + use async_trait::async_trait; 4 + use serde::{Serialize, Deserialize}; 5 + use std::sync::Arc; 6 + 7 + #[cfg(feature = "rocksdb-storage")] 8 + pub mod rocksdb_store; 9 + pub mod memory_store; 10 + 11 + pub use memory_store::MemoryStore; 12 + #[cfg(feature = "rocksdb-storage")] 13 + pub use rocksdb_store::RocksDBStore; 14 + 15 + /// Trait for persistent storage backends 16 + #[async_trait] 17 + pub trait StorageBackend: Send + Sync { 18 + /// Initialize the storage backend 19 + async fn initialize(&self) -> Result<()>; 20 + 21 + /// Store a node persistently 22 + async fn store_node(&self, node_id: NodeId, node: &Node) -> Result<()>; 23 + 24 + /// Retrieve a node by ID 25 + async fn get_node(&self, node_id: NodeId) -> Result<Option<Node>>; 26 + 27 + /// Store a relationship persistently 28 + async fn store_relationship(&self, rel_id: RelationshipId, relationship: &Relationship) -> Result<()>; 29 + 30 + /// Retrieve a relationship by ID 31 + async fn get_relationship(&self, rel_id: RelationshipId) -> Result<Option<Relationship>>; 32 + 33 + /// Store graph schema 34 + async fn store_schema(&self, schema: &GraphSchema) -> Result<()>; 35 + 36 + /// Retrieve graph schema 37 + async fn get_schema(&self) -> Result<Option<GraphSchema>>; 38 + 39 + /// Get all node IDs 40 + async fn get_all_node_ids(&self) -> Result<Vec<NodeId>>; 41 + 42 + /// Get all relationship IDs 43 + async fn get_all_relationship_ids(&self) -> Result<Vec<RelationshipId>>; 44 + 45 + /// Delete a node 46 + async fn delete_node(&self, node_id: NodeId) -> Result<()>; 47 + 48 + /// Delete a relationship 49 + async fn delete_relationship(&self, rel_id: RelationshipId) -> Result<()>; 50 + 51 + /// Store metadata (counters, etc.) 52 + async fn store_metadata(&self, key: &str, value: &[u8]) -> Result<()>; 53 + 54 + /// Retrieve metadata 55 + async fn get_metadata(&self, key: &str) -> Result<Option<Vec<u8>>>; 56 + 57 + /// Flush all pending writes to disk 58 + async fn flush(&self) -> Result<()>; 59 + 60 + /// Close the storage backend 61 + async fn close(&self) -> Result<()>; 62 + } 63 + 64 + /// Storage keys for different data types 65 + #[derive(Debug, Clone, PartialEq, Eq, Hash)] 66 + pub enum StorageKey { 67 + Node(NodeId), 68 + Relationship(RelationshipId), 69 + Schema, 70 + NodeCounter, 71 + RelationshipCounter, 72 + Custom(String), 73 + } 74 + 75 + impl StorageKey { 76 + pub fn to_bytes(&self) -> Vec<u8> { 77 + match self { 78 + StorageKey::Node(id) => { 79 + let mut key = b"node:".to_vec(); 80 + key.extend_from_slice(&id.0.to_be_bytes()); 81 + key 82 + }, 83 + StorageKey::Relationship(id) => { 84 + let mut key = b"rel:".to_vec(); 85 + key.extend_from_slice(&id.0.to_be_bytes()); 86 + key 87 + }, 88 + StorageKey::Schema => b"schema".to_vec(), 89 + StorageKey::NodeCounter => b"meta:node_counter".to_vec(), 90 + StorageKey::RelationshipCounter => b"meta:rel_counter".to_vec(), 91 + StorageKey::Custom(s) => { 92 + let mut key = b"meta:".to_vec(); 93 + key.extend_from_slice(s.as_bytes()); 94 + key 95 + }, 96 + } 97 + } 98 + 99 + pub fn from_bytes(bytes: &[u8]) -> Result<Self> { 100 + if bytes.starts_with(b"node:") && bytes.len() == 13 { 101 + let id_bytes = &bytes[5..13]; 102 + let id = u64::from_be_bytes(id_bytes.try_into().map_err(|_| { 103 + GigabrainError::Storage("Invalid node ID in key".to_string()) 104 + })?); 105 + Ok(StorageKey::Node(NodeId(id))) 106 + } else if bytes.starts_with(b"rel:") && bytes.len() == 12 { 107 + let id_bytes = &bytes[4..12]; 108 + let id = u64::from_be_bytes(id_bytes.try_into().map_err(|_| { 109 + GigabrainError::Storage("Invalid relationship ID in key".to_string()) 110 + })?); 111 + Ok(StorageKey::Relationship(RelationshipId(id))) 112 + } else if bytes == b"schema" { 113 + Ok(StorageKey::Schema) 114 + } else if bytes == b"meta:node_counter" { 115 + Ok(StorageKey::NodeCounter) 116 + } else if bytes == b"meta:rel_counter" { 117 + Ok(StorageKey::RelationshipCounter) 118 + } else if bytes.starts_with(b"meta:") { 119 + let custom_part = String::from_utf8(bytes[5..].to_vec()).map_err(|_| { 120 + GigabrainError::Storage("Invalid UTF-8 in custom key".to_string()) 121 + })?; 122 + Ok(StorageKey::Custom(custom_part)) 123 + } else { 124 + Err(GigabrainError::Storage("Unknown key format".to_string())) 125 + } 126 + } 127 + } 128 + 129 + /// Serializable node data for storage 130 + #[derive(Debug, Clone, Serialize, Deserialize)] 131 + pub struct SerializableNode { 132 + pub id: NodeId, 133 + pub labels: Vec<crate::LabelId>, 134 + pub properties: std::collections::HashMap<crate::PropertyKeyId, crate::core::PropertyValue>, 135 + } 136 + 137 + impl From<&Node> for SerializableNode { 138 + fn from(node: &Node) -> Self { 139 + Self { 140 + id: node.id, 141 + labels: node.labels.clone(), 142 + properties: node.properties.clone(), 143 + } 144 + } 145 + } 146 + 147 + impl From<SerializableNode> for Node { 148 + fn from(ser_node: SerializableNode) -> Self { 149 + Self { 150 + id: ser_node.id, 151 + labels: ser_node.labels, 152 + properties: ser_node.properties, 153 + } 154 + } 155 + } 156 + 157 + /// Serializable relationship data for storage 158 + #[derive(Debug, Clone, Serialize, Deserialize)] 159 + pub struct SerializableRelationship { 160 + pub id: RelationshipId, 161 + pub start_node: NodeId, 162 + pub end_node: NodeId, 163 + pub rel_type: u32, 164 + pub properties: std::collections::HashMap<crate::PropertyKeyId, crate::core::PropertyValue>, 165 + } 166 + 167 + impl From<&Relationship> for SerializableRelationship { 168 + fn from(rel: &Relationship) -> Self { 169 + Self { 170 + id: rel.id, 171 + start_node: rel.start_node, 172 + end_node: rel.end_node, 173 + rel_type: rel.rel_type, 174 + properties: rel.properties.clone(), 175 + } 176 + } 177 + } 178 + 179 + impl From<SerializableRelationship> for Relationship { 180 + fn from(ser_rel: SerializableRelationship) -> Self { 181 + Self { 182 + id: ser_rel.id, 183 + start_node: ser_rel.start_node, 184 + end_node: ser_rel.end_node, 185 + rel_type: ser_rel.rel_type, 186 + properties: ser_rel.properties, 187 + } 188 + } 189 + } 190 + 191 + /// Persistent storage manager 192 + pub struct PersistentStorage { 193 + backend: Arc<dyn StorageBackend>, 194 + } 195 + 196 + impl PersistentStorage { 197 + pub fn new(backend: Arc<dyn StorageBackend>) -> Self { 198 + Self { backend } 199 + } 200 + 201 + pub async fn initialize(&self) -> Result<()> { 202 + self.backend.initialize().await 203 + } 204 + 205 + pub async fn store_node(&self, node: &Node) -> Result<()> { 206 + self.backend.store_node(node.id, node).await 207 + } 208 + 209 + pub async fn get_node(&self, node_id: NodeId) -> Result<Option<Node>> { 210 + self.backend.get_node(node_id).await 211 + } 212 + 213 + pub async fn store_relationship(&self, relationship: &Relationship) -> Result<()> { 214 + self.backend.store_relationship(relationship.id, relationship).await 215 + } 216 + 217 + pub async fn get_relationship(&self, rel_id: RelationshipId) -> Result<Option<Relationship>> { 218 + self.backend.get_relationship(rel_id).await 219 + } 220 + 221 + pub async fn store_schema(&self, schema: &GraphSchema) -> Result<()> { 222 + self.backend.store_schema(schema).await 223 + } 224 + 225 + pub async fn get_schema(&self) -> Result<Option<GraphSchema>> { 226 + self.backend.get_schema().await 227 + } 228 + 229 + pub async fn get_all_node_ids(&self) -> Result<Vec<NodeId>> { 230 + self.backend.get_all_node_ids().await 231 + } 232 + 233 + pub async fn get_all_relationship_ids(&self) -> Result<Vec<RelationshipId>> { 234 + self.backend.get_all_relationship_ids().await 235 + } 236 + 237 + pub async fn delete_node(&self, node_id: NodeId) -> Result<()> { 238 + self.backend.delete_node(node_id).await 239 + } 240 + 241 + pub async fn delete_relationship(&self, rel_id: RelationshipId) -> Result<()> { 242 + self.backend.delete_relationship(rel_id).await 243 + } 244 + 245 + pub async fn store_counter(&self, key: &str, value: u64) -> Result<()> { 246 + let bytes = value.to_be_bytes(); 247 + self.backend.store_metadata(key, &bytes).await 248 + } 249 + 250 + pub async fn get_counter(&self, key: &str) -> Result<Option<u64>> { 251 + if let Some(bytes) = self.backend.get_metadata(key).await? { 252 + if bytes.len() == 8 { 253 + let value = u64::from_be_bytes(bytes.try_into().map_err(|_| { 254 + GigabrainError::Storage("Invalid counter format".to_string()) 255 + })?); 256 + Ok(Some(value)) 257 + } else { 258 + Err(GigabrainError::Storage("Invalid counter length".to_string())) 259 + } 260 + } else { 261 + Ok(None) 262 + } 263 + } 264 + 265 + pub async fn flush(&self) -> Result<()> { 266 + self.backend.flush().await 267 + } 268 + 269 + pub async fn close(&self) -> Result<()> { 270 + self.backend.close().await 271 + } 272 + } 273 + 274 + #[cfg(test)] 275 + mod tests { 276 + use super::*; 277 + 278 + #[test] 279 + fn test_storage_key_serialization() { 280 + let node_key = StorageKey::Node(NodeId(42)); 281 + let bytes = node_key.to_bytes(); 282 + let decoded = StorageKey::from_bytes(&bytes).unwrap(); 283 + assert_eq!(node_key, decoded); 284 + 285 + let rel_key = StorageKey::Relationship(RelationshipId(123)); 286 + let bytes = rel_key.to_bytes(); 287 + let decoded = StorageKey::from_bytes(&bytes).unwrap(); 288 + assert_eq!(rel_key, decoded); 289 + 290 + let schema_key = StorageKey::Schema; 291 + let bytes = schema_key.to_bytes(); 292 + let decoded = StorageKey::from_bytes(&bytes).unwrap(); 293 + assert_eq!(schema_key, decoded); 294 + } 295 + }
+425
src/persistence/rocksdb_store.rs
··· 1 + #[cfg(feature = "rocksdb-storage")] 2 + use crate::{Result, GigabrainError, NodeId, RelationshipId}; 3 + use crate::core::{Node, Relationship, GraphSchema}; 4 + use crate::persistence::{StorageBackend, StorageKey, SerializableNode, SerializableRelationship}; 5 + use async_trait::async_trait; 6 + use rocksdb::{DB, Options, ColumnFamily, ColumnFamilyDescriptor, WriteBatch}; 7 + use std::path::Path; 8 + use std::sync::Arc; 9 + use tokio::task; 10 + 11 + /// RocksDB storage backend implementation 12 + #[cfg(feature = "rocksdb-storage")] 13 + pub struct RocksDBStore { 14 + db: Arc<DB>, 15 + db_path: String, 16 + } 17 + 18 + #[cfg(feature = "rocksdb-storage")] 19 + impl RocksDBStore { 20 + /// Create a new RocksDB store 21 + pub fn new<P: AsRef<Path>>(db_path: P) -> Result<Self> { 22 + let path_str = db_path.as_ref().to_string_lossy().to_string(); 23 + 24 + // Configure RocksDB options 25 + let mut opts = Options::default(); 26 + opts.create_if_missing(true); 27 + opts.create_missing_column_families(true); 28 + 29 + // Configure column families 30 + let cf_descriptors = vec![ 31 + ColumnFamilyDescriptor::new("default", Options::default()), 32 + ColumnFamilyDescriptor::new("nodes", Options::default()), 33 + ColumnFamilyDescriptor::new("relationships", Options::default()), 34 + ColumnFamilyDescriptor::new("schema", Options::default()), 35 + ColumnFamilyDescriptor::new("metadata", Options::default()), 36 + ]; 37 + 38 + // Open database with column families 39 + let db = DB::open_cf_descriptors(&opts, &path_str, cf_descriptors) 40 + .map_err(|e| GigabrainError::Storage(format!("Failed to open RocksDB: {}", e)))?; 41 + 42 + Ok(Self { 43 + db: Arc::new(db), 44 + db_path: path_str, 45 + }) 46 + } 47 + 48 + /// Get column family handle 49 + fn get_cf(&self, name: &str) -> Result<&ColumnFamily> { 50 + self.db.cf_handle(name) 51 + .ok_or_else(|| GigabrainError::Storage(format!("Column family '{}' not found", name))) 52 + } 53 + 54 + /// Serialize data using bincode 55 + fn serialize<T: serde::Serialize>(&self, value: &T) -> Result<Vec<u8>> { 56 + bincode::serialize(value) 57 + .map_err(|e| GigabrainError::Storage(format!("Serialization failed: {}", e))) 58 + } 59 + 60 + /// Deserialize data using bincode 61 + fn deserialize<T: serde::de::DeserializeOwned>(&self, bytes: &[u8]) -> Result<T> { 62 + bincode::deserialize(bytes) 63 + .map_err(|e| GigabrainError::Storage(format!("Deserialization failed: {}", e))) 64 + } 65 + } 66 + 67 + #[cfg(feature = "rocksdb-storage")] 68 + #[async_trait] 69 + impl StorageBackend for RocksDBStore { 70 + async fn initialize(&self) -> Result<()> { 71 + // RocksDB is initialized in the constructor 72 + // This method can be used for any additional setup 73 + tracing::info!("RocksDB storage initialized at: {}", self.db_path); 74 + Ok(()) 75 + } 76 + 77 + async fn store_node(&self, node_id: NodeId, node: &Node) -> Result<()> { 78 + let cf = self.get_cf("nodes")?; 79 + let key = StorageKey::Node(node_id).to_bytes(); 80 + let serializable_node = SerializableNode::from(node); 81 + let value = self.serialize(&serializable_node)?; 82 + 83 + let db = Arc::clone(&self.db); 84 + task::spawn_blocking(move || { 85 + db.put_cf(cf, key, value) 86 + .map_err(|e| GigabrainError::Storage(format!("Failed to store node: {}", e))) 87 + }).await 88 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 89 + 90 + Ok(()) 91 + } 92 + 93 + async fn get_node(&self, node_id: NodeId) -> Result<Option<Node>> { 94 + let cf = self.get_cf("nodes")?; 95 + let key = StorageKey::Node(node_id).to_bytes(); 96 + 97 + let db = Arc::clone(&self.db); 98 + let result = task::spawn_blocking(move || { 99 + db.get_cf(cf, key) 100 + .map_err(|e| GigabrainError::Storage(format!("Failed to get node: {}", e))) 101 + }).await 102 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 103 + 104 + if let Some(bytes) = result { 105 + let serializable_node: SerializableNode = self.deserialize(&bytes)?; 106 + Ok(Some(Node::from(serializable_node))) 107 + } else { 108 + Ok(None) 109 + } 110 + } 111 + 112 + async fn store_relationship(&self, rel_id: RelationshipId, relationship: &Relationship) -> Result<()> { 113 + let cf = self.get_cf("relationships")?; 114 + let key = StorageKey::Relationship(rel_id).to_bytes(); 115 + let serializable_rel = SerializableRelationship::from(relationship); 116 + let value = self.serialize(&serializable_rel)?; 117 + 118 + let db = Arc::clone(&self.db); 119 + task::spawn_blocking(move || { 120 + db.put_cf(cf, key, value) 121 + .map_err(|e| GigabrainError::Storage(format!("Failed to store relationship: {}", e))) 122 + }).await 123 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 124 + 125 + Ok(()) 126 + } 127 + 128 + async fn get_relationship(&self, rel_id: RelationshipId) -> Result<Option<Relationship>> { 129 + let cf = self.get_cf("relationships")?; 130 + let key = StorageKey::Relationship(rel_id).to_bytes(); 131 + 132 + let db = Arc::clone(&self.db); 133 + let result = task::spawn_blocking(move || { 134 + db.get_cf(cf, key) 135 + .map_err(|e| GigabrainError::Storage(format!("Failed to get relationship: {}", e))) 136 + }).await 137 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 138 + 139 + if let Some(bytes) = result { 140 + let serializable_rel: SerializableRelationship = self.deserialize(&bytes)?; 141 + Ok(Some(Relationship::from(serializable_rel))) 142 + } else { 143 + Ok(None) 144 + } 145 + } 146 + 147 + async fn store_schema(&self, schema: &GraphSchema) -> Result<()> { 148 + let cf = self.get_cf("schema")?; 149 + let key = StorageKey::Schema.to_bytes(); 150 + let value = self.serialize(schema)?; 151 + 152 + let db = Arc::clone(&self.db); 153 + task::spawn_blocking(move || { 154 + db.put_cf(cf, key, value) 155 + .map_err(|e| GigabrainError::Storage(format!("Failed to store schema: {}", e))) 156 + }).await 157 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 158 + 159 + Ok(()) 160 + } 161 + 162 + async fn get_schema(&self) -> Result<Option<GraphSchema>> { 163 + let cf = self.get_cf("schema")?; 164 + let key = StorageKey::Schema.to_bytes(); 165 + 166 + let db = Arc::clone(&self.db); 167 + let result = task::spawn_blocking(move || { 168 + db.get_cf(cf, key) 169 + .map_err(|e| GigabrainError::Storage(format!("Failed to get schema: {}", e))) 170 + }).await 171 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 172 + 173 + if let Some(bytes) = result { 174 + let schema: GraphSchema = self.deserialize(&bytes)?; 175 + Ok(Some(schema)) 176 + } else { 177 + Ok(None) 178 + } 179 + } 180 + 181 + async fn get_all_node_ids(&self) -> Result<Vec<NodeId>> { 182 + let cf = self.get_cf("nodes")?; 183 + let db = Arc::clone(&self.db); 184 + 185 + let result = task::spawn_blocking(move || { 186 + let mut node_ids = Vec::new(); 187 + let iter = db.iterator_cf(cf, rocksdb::IteratorMode::Start); 188 + 189 + for item in iter { 190 + match item { 191 + Ok((key, _)) => { 192 + if let Ok(storage_key) = StorageKey::from_bytes(&key) { 193 + if let StorageKey::Node(node_id) = storage_key { 194 + node_ids.push(node_id); 195 + } 196 + } 197 + }, 198 + Err(e) => return Err(GigabrainError::Storage(format!("Iterator error: {}", e))), 199 + } 200 + } 201 + 202 + Ok(node_ids) 203 + }).await 204 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 205 + 206 + Ok(result) 207 + } 208 + 209 + async fn get_all_relationship_ids(&self) -> Result<Vec<RelationshipId>> { 210 + let cf = self.get_cf("relationships")?; 211 + let db = Arc::clone(&self.db); 212 + 213 + let result = task::spawn_blocking(move || { 214 + let mut rel_ids = Vec::new(); 215 + let iter = db.iterator_cf(cf, rocksdb::IteratorMode::Start); 216 + 217 + for item in iter { 218 + match item { 219 + Ok((key, _)) => { 220 + if let Ok(storage_key) = StorageKey::from_bytes(&key) { 221 + if let StorageKey::Relationship(rel_id) = storage_key { 222 + rel_ids.push(rel_id); 223 + } 224 + } 225 + }, 226 + Err(e) => return Err(GigabrainError::Storage(format!("Iterator error: {}", e))), 227 + } 228 + } 229 + 230 + Ok(rel_ids) 231 + }).await 232 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 233 + 234 + Ok(result) 235 + } 236 + 237 + async fn delete_node(&self, node_id: NodeId) -> Result<()> { 238 + let cf = self.get_cf("nodes")?; 239 + let key = StorageKey::Node(node_id).to_bytes(); 240 + 241 + let db = Arc::clone(&self.db); 242 + task::spawn_blocking(move || { 243 + db.delete_cf(cf, key) 244 + .map_err(|e| GigabrainError::Storage(format!("Failed to delete node: {}", e))) 245 + }).await 246 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 247 + 248 + Ok(()) 249 + } 250 + 251 + async fn delete_relationship(&self, rel_id: RelationshipId) -> Result<()> { 252 + let cf = self.get_cf("relationships")?; 253 + let key = StorageKey::Relationship(rel_id).to_bytes(); 254 + 255 + let db = Arc::clone(&self.db); 256 + task::spawn_blocking(move || { 257 + db.delete_cf(cf, key) 258 + .map_err(|e| GigabrainError::Storage(format!("Failed to delete relationship: {}", e))) 259 + }).await 260 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 261 + 262 + Ok(()) 263 + } 264 + 265 + async fn store_metadata(&self, key: &str, value: &[u8]) -> Result<()> { 266 + let cf = self.get_cf("metadata")?; 267 + let storage_key = StorageKey::Custom(key.to_string()).to_bytes(); 268 + 269 + let db = Arc::clone(&self.db); 270 + let value = value.to_vec(); 271 + task::spawn_blocking(move || { 272 + db.put_cf(cf, storage_key, value) 273 + .map_err(|e| GigabrainError::Storage(format!("Failed to store metadata: {}", e))) 274 + }).await 275 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 276 + 277 + Ok(()) 278 + } 279 + 280 + async fn get_metadata(&self, key: &str) -> Result<Option<Vec<u8>>> { 281 + let cf = self.get_cf("metadata")?; 282 + let storage_key = StorageKey::Custom(key.to_string()).to_bytes(); 283 + 284 + let db = Arc::clone(&self.db); 285 + let result = task::spawn_blocking(move || { 286 + db.get_cf(cf, storage_key) 287 + .map_err(|e| GigabrainError::Storage(format!("Failed to get metadata: {}", e))) 288 + }).await 289 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 290 + 291 + Ok(result) 292 + } 293 + 294 + async fn flush(&self) -> Result<()> { 295 + let db = Arc::clone(&self.db); 296 + task::spawn_blocking(move || { 297 + db.flush() 298 + .map_err(|e| GigabrainError::Storage(format!("Failed to flush: {}", e))) 299 + }).await 300 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))??; 301 + 302 + Ok(()) 303 + } 304 + 305 + async fn close(&self) -> Result<()> { 306 + // RocksDB will be closed when the DB instance is dropped 307 + // We can implement graceful shutdown here if needed 308 + tracing::info!("Closing RocksDB storage"); 309 + Ok(()) 310 + } 311 + } 312 + 313 + #[cfg(not(feature = "rocksdb-storage"))] 314 + pub struct RocksDBStore; 315 + 316 + #[cfg(not(feature = "rocksdb-storage"))] 317 + impl RocksDBStore { 318 + pub fn new<P: AsRef<std::path::Path>>(_db_path: P) -> crate::Result<Self> { 319 + Err(crate::GigabrainError::Storage("RocksDB support not enabled. Enable 'rocksdb-storage' feature.".to_string())) 320 + } 321 + } 322 + 323 + #[cfg(test)] 324 + #[cfg(feature = "rocksdb-storage")] 325 + mod tests { 326 + use super::*; 327 + use tempfile::tempdir; 328 + use crate::core::PropertyValue; 329 + use std::collections::HashMap; 330 + 331 + #[tokio::test] 332 + async fn test_rocksdb_node_operations() -> Result<()> { 333 + let temp_dir = tempdir().unwrap(); 334 + let store = RocksDBStore::new(temp_dir.path())?; 335 + store.initialize().await?; 336 + 337 + // Create a test node 338 + let node_id = NodeId(42); 339 + let mut properties = HashMap::new(); 340 + properties.insert(crate::PropertyKeyId(0), PropertyValue::String("Alice".to_string())); 341 + properties.insert(crate::PropertyKeyId(1), PropertyValue::Integer(30)); 342 + 343 + let node = Node { 344 + id: node_id, 345 + labels: vec![crate::LabelId(0)], 346 + properties, 347 + }; 348 + 349 + // Store the node 350 + store.store_node(node_id, &node).await?; 351 + 352 + // Retrieve the node 353 + let retrieved = store.get_node(node_id).await?; 354 + assert!(retrieved.is_some()); 355 + let retrieved_node = retrieved.unwrap(); 356 + assert_eq!(retrieved_node.id, node.id); 357 + assert_eq!(retrieved_node.labels, node.labels); 358 + assert_eq!(retrieved_node.properties.len(), node.properties.len()); 359 + 360 + // Delete the node 361 + store.delete_node(node_id).await?; 362 + let deleted = store.get_node(node_id).await?; 363 + assert!(deleted.is_none()); 364 + 365 + Ok(()) 366 + } 367 + 368 + #[tokio::test] 369 + async fn test_rocksdb_relationship_operations() -> Result<()> { 370 + let temp_dir = tempdir().unwrap(); 371 + let store = RocksDBStore::new(temp_dir.path())?; 372 + store.initialize().await?; 373 + 374 + // Create a test relationship 375 + let rel_id = RelationshipId(123); 376 + let mut properties = HashMap::new(); 377 + properties.insert(crate::PropertyKeyId(0), PropertyValue::String("since".to_string())); 378 + 379 + let relationship = Relationship { 380 + id: rel_id, 381 + start_node: NodeId(1), 382 + end_node: NodeId(2), 383 + rel_type: 0, 384 + properties, 385 + }; 386 + 387 + // Store the relationship 388 + store.store_relationship(rel_id, &relationship).await?; 389 + 390 + // Retrieve the relationship 391 + let retrieved = store.get_relationship(rel_id).await?; 392 + assert!(retrieved.is_some()); 393 + let retrieved_rel = retrieved.unwrap(); 394 + assert_eq!(retrieved_rel.id, relationship.id); 395 + assert_eq!(retrieved_rel.start_node, relationship.start_node); 396 + assert_eq!(retrieved_rel.end_node, relationship.end_node); 397 + 398 + // Delete the relationship 399 + store.delete_relationship(rel_id).await?; 400 + let deleted = store.get_relationship(rel_id).await?; 401 + assert!(deleted.is_none()); 402 + 403 + Ok(()) 404 + } 405 + 406 + #[tokio::test] 407 + async fn test_rocksdb_metadata_operations() -> Result<()> { 408 + let temp_dir = tempdir().unwrap(); 409 + let store = RocksDBStore::new(temp_dir.path())?; 410 + store.initialize().await?; 411 + 412 + // Store metadata 413 + let key = "test_counter"; 414 + let value = 12345u64.to_be_bytes(); 415 + store.store_metadata(key, &value).await?; 416 + 417 + // Retrieve metadata 418 + let retrieved = store.get_metadata(key).await?; 419 + assert!(retrieved.is_some()); 420 + let retrieved_bytes = retrieved.unwrap(); 421 + assert_eq!(retrieved_bytes, value); 422 + 423 + Ok(()) 424 + } 425 + }
+40
src/storage/memory_store.rs
··· 141 141 async fn compact(&self) -> Result<()> { 142 142 Ok(()) 143 143 } 144 + 145 + async fn put_index_config(&self, _name: &str, _config_data: &[u8]) -> Result<()> { 146 + // No-op for memory store - indexes are not persisted 147 + Ok(()) 148 + } 149 + 150 + async fn get_index_config(&self, _name: &str) -> Result<Option<Vec<u8>>> { 151 + // No-op for memory store - indexes are not persisted 152 + Ok(None) 153 + } 154 + 155 + async fn delete_index_config(&self, _name: &str) -> Result<()> { 156 + // No-op for memory store - indexes are not persisted 157 + Ok(()) 158 + } 159 + 160 + async fn list_index_configs(&self) -> Result<Vec<String>> { 161 + // No-op for memory store - indexes are not persisted 162 + Ok(Vec::new()) 163 + } 164 + 165 + async fn put_index_data(&self, _index_key: &str, _data: &[u8]) -> Result<()> { 166 + // No-op for memory store - indexes are not persisted 167 + Ok(()) 168 + } 169 + 170 + async fn get_index_data(&self, _index_key: &str) -> Result<Option<Vec<u8>>> { 171 + // No-op for memory store - indexes are not persisted 172 + Ok(None) 173 + } 174 + 175 + async fn delete_index_data(&self, _index_key: &str) -> Result<()> { 176 + // No-op for memory store - indexes are not persisted 177 + Ok(()) 178 + } 179 + 180 + async fn list_index_data_keys(&self, _prefix: &str) -> Result<Vec<String>> { 181 + // No-op for memory store - indexes are not persisted 182 + Ok(Vec::new()) 183 + } 144 184 }
+23
src/storage/mod.rs
··· 27 27 async fn put_node_relationships(&self, node_id: NodeId, relationships: &[RelationshipId]) -> Result<()>; 28 28 async fn get_node_relationships(&self, node_id: NodeId) -> Result<Vec<RelationshipId>>; 29 29 30 + // Index persistence methods 31 + async fn put_index_config(&self, name: &str, config_data: &[u8]) -> Result<()>; 32 + async fn get_index_config(&self, name: &str) -> Result<Option<Vec<u8>>>; 33 + async fn delete_index_config(&self, name: &str) -> Result<()>; 34 + async fn list_index_configs(&self) -> Result<Vec<String>>; 35 + 36 + async fn put_index_data(&self, index_key: &str, data: &[u8]) -> Result<()>; 37 + async fn get_index_data(&self, index_key: &str) -> Result<Option<Vec<u8>>>; 38 + async fn delete_index_data(&self, index_key: &str) -> Result<()>; 39 + async fn list_index_data_keys(&self, prefix: &str) -> Result<Vec<String>>; 40 + 30 41 async fn put_raw(&self, key: &[u8], value: &[u8]) -> Result<()>; 31 42 async fn get_raw(&self, key: &[u8]) -> Result<Option<Vec<u8>>>; 32 43 async fn delete_raw(&self, key: &[u8]) -> Result<()>; ··· 66 77 pub fn encode_node_relationships_key(node_id: NodeId) -> Vec<u8> { 67 78 let mut key = vec![KeyPrefix::NodeRelationships.as_byte()]; 68 79 key.extend_from_slice(&node_id.0.to_be_bytes()); 80 + key 81 + } 82 + 83 + pub fn encode_index_config_key(name: &str) -> Vec<u8> { 84 + let mut key = vec![KeyPrefix::Index.as_byte(), 0]; // 0 for config 85 + key.extend_from_slice(name.as_bytes()); 86 + key 87 + } 88 + 89 + pub fn encode_index_data_key(index_key: &str) -> Vec<u8> { 90 + let mut key = vec![KeyPrefix::Index.as_byte(), 1]; // 1 for data 91 + key.extend_from_slice(index_key.as_bytes()); 69 92 key 70 93 }
+67
src/storage/persistent_store.rs
··· 211 211 // Create checkpoint and clean up WAL 212 212 self.checkpoint().await 213 213 } 214 + 215 + async fn put_index_config(&self, name: &str, config_data: &[u8]) -> Result<(), GigabrainError> { 216 + let key = format!("index_config:{}", name).into_bytes(); 217 + self.write_with_wal(key, config_data.to_vec()).await 218 + } 219 + 220 + async fn get_index_config(&self, name: &str) -> Result<Option<Vec<u8>>, GigabrainError> { 221 + let key = format!("index_config:{}", name).into_bytes(); 222 + match self.data.get(&key) { 223 + Some(value) => Ok(Some(value.clone())), 224 + None => Ok(None), 225 + } 226 + } 227 + 228 + async fn delete_index_config(&self, name: &str) -> Result<(), GigabrainError> { 229 + let key = format!("index_config:{}", name).into_bytes(); 230 + self.data.remove(&key); 231 + Ok(()) 232 + } 233 + 234 + async fn list_index_configs(&self) -> Result<Vec<String>, GigabrainError> { 235 + let prefix = b"index_config:"; 236 + let mut configs = Vec::new(); 237 + 238 + for entry in self.data.iter() { 239 + if entry.key().starts_with(prefix) { 240 + let name = String::from_utf8_lossy(&entry.key()[prefix.len()..]).to_string(); 241 + configs.push(name); 242 + } 243 + } 244 + 245 + Ok(configs) 246 + } 247 + 248 + async fn put_index_data(&self, index_key: &str, data: &[u8]) -> Result<(), GigabrainError> { 249 + let key = format!("index_data:{}", index_key).into_bytes(); 250 + self.write_with_wal(key, data.to_vec()).await 251 + } 252 + 253 + async fn get_index_data(&self, index_key: &str) -> Result<Option<Vec<u8>>, GigabrainError> { 254 + let key = format!("index_data:{}", index_key).into_bytes(); 255 + match self.data.get(&key) { 256 + Some(value) => Ok(Some(value.clone())), 257 + None => Ok(None), 258 + } 259 + } 260 + 261 + async fn delete_index_data(&self, index_key: &str) -> Result<(), GigabrainError> { 262 + let key = format!("index_data:{}", index_key).into_bytes(); 263 + self.data.remove(&key); 264 + Ok(()) 265 + } 266 + 267 + async fn list_index_data_keys(&self, prefix: &str) -> Result<Vec<String>, GigabrainError> { 268 + let search_prefix = format!("index_data:{}", prefix).into_bytes(); 269 + let mut keys = Vec::new(); 270 + 271 + for entry in self.data.iter() { 272 + if entry.key().starts_with(&search_prefix) { 273 + let full_prefix = b"index_data:"; 274 + let key_str = String::from_utf8_lossy(&entry.key()[full_prefix.len()..]).to_string(); 275 + keys.push(key_str); 276 + } 277 + } 278 + 279 + Ok(keys) 280 + } 214 281 } 215 282 216 283 #[cfg(test)]
+174 -2
src/storage/rocksdb_store.rs
··· 2 2 use crate::{ 3 3 core::{graph::Node, relationship::Relationship}, 4 4 error::GigabrainError, 5 - storage::{encode_node_key, encode_relationship_key, encode_node_relationships_key}, 5 + storage::{encode_node_key, encode_relationship_key, encode_node_relationships_key, encode_index_config_key, encode_index_data_key, KeyPrefix}, 6 6 NodeId, RelationshipId, 7 7 }; 8 8 #[cfg(feature = "rocksdb-storage")] ··· 50 50 ColumnFamilyDescriptor::new("nodes", Options::default()), 51 51 ColumnFamilyDescriptor::new("relationships", Options::default()), 52 52 ColumnFamilyDescriptor::new("node_relationships", Options::default()), 53 + ColumnFamilyDescriptor::new("indexes", Options::default()), 53 54 ColumnFamilyDescriptor::new("metadata", Options::default()), 54 55 ]; 55 56 ··· 297 298 .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 298 299 } 299 300 301 + async fn put_index_config(&self, name: &str, config_data: &[u8]) -> Result<(), GigabrainError> { 302 + let db = self.db.clone(); 303 + let key = encode_index_config_key(name); 304 + let config_data = config_data.to_vec(); 305 + 306 + task::spawn_blocking(move || { 307 + let cf = db.cf_handle("indexes") 308 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 309 + 310 + let mut write_opts = WriteOptions::default(); 311 + write_opts.set_sync(false); 312 + 313 + db.put_cf_opt(&cf, &key, &config_data, &write_opts) 314 + .map_err(|e| GigabrainError::Storage(format!("Failed to put index config: {}", e))) 315 + }).await 316 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 317 + } 318 + 319 + async fn get_index_config(&self, name: &str) -> Result<Option<Vec<u8>>, GigabrainError> { 320 + let db = self.db.clone(); 321 + let key = encode_index_config_key(name); 322 + 323 + task::spawn_blocking(move || { 324 + let cf = db.cf_handle("indexes") 325 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 326 + 327 + let read_opts = ReadOptions::default(); 328 + 329 + match db.get_cf_opt(&cf, &key, &read_opts) { 330 + Ok(data) => Ok(data), 331 + Err(e) => Err(GigabrainError::Storage(format!("Failed to get index config: {}", e))), 332 + } 333 + }).await 334 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 335 + } 336 + 337 + async fn delete_index_config(&self, name: &str) -> Result<(), GigabrainError> { 338 + let db = self.db.clone(); 339 + let key = encode_index_config_key(name); 340 + 341 + task::spawn_blocking(move || { 342 + let cf = db.cf_handle("indexes") 343 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 344 + 345 + let mut write_opts = WriteOptions::default(); 346 + write_opts.set_sync(false); 347 + 348 + db.delete_cf_opt(&cf, &key, &write_opts) 349 + .map_err(|e| GigabrainError::Storage(format!("Failed to delete index config: {}", e))) 350 + }).await 351 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 352 + } 353 + 354 + async fn list_index_configs(&self) -> Result<Vec<String>, GigabrainError> { 355 + let db = self.db.clone(); 356 + 357 + task::spawn_blocking(move || { 358 + let cf = db.cf_handle("indexes") 359 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 360 + 361 + let prefix = vec![KeyPrefix::Index.as_byte(), 0]; // config prefix 362 + let mut configs = Vec::new(); 363 + 364 + let iter = db.iterator_cf(&cf, rocksdb::IteratorMode::From(&prefix, rocksdb::Direction::Forward)); 365 + for item in iter { 366 + match item { 367 + Ok((key, _)) => { 368 + if key.len() >= 2 && key[0] == KeyPrefix::Index.as_byte() && key[1] == 0 { 369 + let name = String::from_utf8_lossy(&key[2..]).to_string(); 370 + configs.push(name); 371 + } else { 372 + break; // No more configs with this prefix 373 + } 374 + }, 375 + Err(e) => return Err(GigabrainError::Storage(format!("Iterator error: {}", e))), 376 + } 377 + } 378 + 379 + Ok(configs) 380 + }).await 381 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 382 + } 383 + 384 + async fn put_index_data(&self, index_key: &str, data: &[u8]) -> Result<(), GigabrainError> { 385 + let db = self.db.clone(); 386 + let key = encode_index_data_key(index_key); 387 + let data = data.to_vec(); 388 + 389 + task::spawn_blocking(move || { 390 + let cf = db.cf_handle("indexes") 391 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 392 + 393 + let mut write_opts = WriteOptions::default(); 394 + write_opts.set_sync(false); 395 + 396 + db.put_cf_opt(&cf, &key, &data, &write_opts) 397 + .map_err(|e| GigabrainError::Storage(format!("Failed to put index data: {}", e))) 398 + }).await 399 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 400 + } 401 + 402 + async fn get_index_data(&self, index_key: &str) -> Result<Option<Vec<u8>>, GigabrainError> { 403 + let db = self.db.clone(); 404 + let key = encode_index_data_key(index_key); 405 + 406 + task::spawn_blocking(move || { 407 + let cf = db.cf_handle("indexes") 408 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 409 + 410 + let read_opts = ReadOptions::default(); 411 + 412 + match db.get_cf_opt(&cf, &key, &read_opts) { 413 + Ok(data) => Ok(data), 414 + Err(e) => Err(GigabrainError::Storage(format!("Failed to get index data: {}", e))), 415 + } 416 + }).await 417 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 418 + } 419 + 420 + async fn delete_index_data(&self, index_key: &str) -> Result<(), GigabrainError> { 421 + let db = self.db.clone(); 422 + let key = encode_index_data_key(index_key); 423 + 424 + task::spawn_blocking(move || { 425 + let cf = db.cf_handle("indexes") 426 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 427 + 428 + let mut write_opts = WriteOptions::default(); 429 + write_opts.set_sync(false); 430 + 431 + db.delete_cf_opt(&cf, &key, &write_opts) 432 + .map_err(|e| GigabrainError::Storage(format!("Failed to delete index data: {}", e))) 433 + }).await 434 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 435 + } 436 + 437 + async fn list_index_data_keys(&self, prefix: &str) -> Result<Vec<String>, GigabrainError> { 438 + let db = self.db.clone(); 439 + let prefix = prefix.to_string(); 440 + 441 + task::spawn_blocking(move || { 442 + let cf = db.cf_handle("indexes") 443 + .ok_or_else(|| GigabrainError::Storage("Column family 'indexes' not found".to_string()))?; 444 + 445 + let search_prefix = encode_index_data_key(&prefix); 446 + let mut keys = Vec::new(); 447 + 448 + let iter = db.iterator_cf(&cf, rocksdb::IteratorMode::From(&search_prefix, rocksdb::Direction::Forward)); 449 + for item in iter { 450 + match item { 451 + Ok((key, _)) => { 452 + if key.len() >= 2 && key[0] == KeyPrefix::Index.as_byte() && key[1] == 1 { 453 + let key_str = String::from_utf8_lossy(&key[2..]).to_string(); 454 + if key_str.starts_with(&prefix) { 455 + keys.push(key_str); 456 + } else { 457 + break; // No more keys with this prefix 458 + } 459 + } else { 460 + break; 461 + } 462 + }, 463 + Err(e) => return Err(GigabrainError::Storage(format!("Iterator error: {}", e))), 464 + } 465 + } 466 + 467 + Ok(keys) 468 + }).await 469 + .map_err(|e| GigabrainError::Storage(format!("Task join error: {}", e)))? 470 + } 471 + 300 472 async fn flush(&self) -> Result<(), GigabrainError> { 301 473 let db = self.db.clone(); 302 474 ··· 312 484 313 485 task::spawn_blocking(move || { 314 486 // Compact all column families 315 - for cf_name in ["nodes", "relationships", "node_relationships", "metadata"] { 487 + for cf_name in ["nodes", "relationships", "node_relationships", "indexes", "metadata"] { 316 488 if let Some(cf) = db.cf_handle(cf_name) { 317 489 db.compact_range_cf(&cf, None::<&[u8]>, None::<&[u8]>); 318 490 }
+444
tests/index_persistence_tests.rs
··· 1 + use std::sync::Arc; 2 + use gigabrain::{Graph, IndexType}; 3 + use gigabrain::core::PropertyValue; 4 + use gigabrain::storage::MemoryStore; 5 + #[cfg(feature = "rocksdb-storage")] 6 + use gigabrain::storage::RocksDBStore; 7 + #[cfg(feature = "rocksdb-storage")] 8 + use tempfile::TempDir; 9 + 10 + type TestResult = Result<(), Box<dyn std::error::Error>>; 11 + 12 + /// Test basic persistent index manager functionality with memory store 13 + #[tokio::test] 14 + async fn test_persistent_index_manager_memory_store() -> TestResult { 15 + let storage = Arc::new(MemoryStore::new()); 16 + let graph = Graph::with_persistent_indexes(storage); 17 + 18 + // Create some test data 19 + let alice_id = graph.create_node(); 20 + let bob_id = graph.create_node(); 21 + 22 + let schema = graph.schema(); 23 + let person_label_id = { 24 + let mut schema_guard = schema.write(); 25 + schema_guard.get_or_create_label("Person") 26 + }; 27 + let name_prop_id = { 28 + let mut schema_guard = schema.write(); 29 + schema_guard.get_or_create_property_key("name") 30 + }; 31 + let age_prop_id = { 32 + let mut schema_guard = schema.write(); 33 + schema_guard.get_or_create_property_key("age") 34 + }; 35 + 36 + // Update nodes with labels and properties 37 + graph.update_node(alice_id, |node| { 38 + node.add_label(person_label_id); 39 + node.properties.insert(name_prop_id, PropertyValue::String("Alice".to_string())); 40 + node.properties.insert(age_prop_id, PropertyValue::Integer(30)); 41 + })?; 42 + 43 + graph.update_node(bob_id, |node| { 44 + node.add_label(person_label_id); 45 + node.properties.insert(name_prop_id, PropertyValue::String("Bob".to_string())); 46 + node.properties.insert(age_prop_id, PropertyValue::Integer(25)); 47 + })?; 48 + 49 + // Load indexes to ensure they are initialized 50 + graph.load_indexes().await?; 51 + 52 + // Test that indexes work correctly 53 + let person_nodes = graph.find_nodes_by_label("Person")?; 54 + assert_eq!(person_nodes.len(), 2); 55 + assert!(person_nodes.contains(&alice_id)); 56 + assert!(person_nodes.contains(&bob_id)); 57 + 58 + let alice_by_name = graph.find_nodes_by_property("name", &PropertyValue::String("Alice".to_string()))?; 59 + assert_eq!(alice_by_name.len(), 1); 60 + assert!(alice_by_name.contains(&alice_id)); 61 + 62 + // Test flushing indexes 63 + graph.flush_indexes().await?; 64 + 65 + // Verify persistent index manager is available 66 + assert!(graph.persistent_index_manager().is_some()); 67 + 68 + println!("✅ Persistent index manager with memory store tests passed!"); 69 + Ok(()) 70 + } 71 + 72 + /// Test persistent index manager with RocksDB store 73 + #[cfg(feature = "rocksdb-storage")] 74 + #[tokio::test] 75 + async fn test_persistent_index_manager_rocksdb_store() -> TestResult { 76 + let temp_dir = TempDir::new()?; 77 + let storage = Arc::new(RocksDBStore::new(temp_dir.path()).await?); 78 + let graph = Graph::with_persistent_indexes(storage); 79 + 80 + // Create some test data 81 + let alice_id = graph.create_node(); 82 + let bob_id = graph.create_node(); 83 + 84 + let schema = graph.schema(); 85 + let person_label_id = { 86 + let mut schema_guard = schema.write(); 87 + schema_guard.get_or_create_label("Person") 88 + }; 89 + let name_prop_id = { 90 + let mut schema_guard = schema.write(); 91 + schema_guard.get_or_create_property_key("name") 92 + }; 93 + let age_prop_id = { 94 + let mut schema_guard = schema.write(); 95 + schema_guard.get_or_create_property_key("age") 96 + }; 97 + 98 + // Update nodes with labels and properties 99 + graph.update_node(alice_id, |node| { 100 + node.add_label(person_label_id); 101 + node.properties.insert(name_prop_id, PropertyValue::String("Alice".to_string())); 102 + node.properties.insert(age_prop_id, PropertyValue::Integer(30)); 103 + })?; 104 + 105 + graph.update_node(bob_id, |node| { 106 + node.add_label(person_label_id); 107 + node.properties.insert(name_prop_id, PropertyValue::String("Bob".to_string())); 108 + node.properties.insert(age_prop_id, PropertyValue::Integer(25)); 109 + })?; 110 + 111 + // Load indexes to ensure they are initialized 112 + graph.load_indexes().await?; 113 + 114 + // Test that indexes work correctly 115 + let person_nodes = graph.find_nodes_by_label("Person")?; 116 + assert_eq!(person_nodes.len(), 2); 117 + assert!(person_nodes.contains(&alice_id)); 118 + assert!(person_nodes.contains(&bob_id)); 119 + 120 + let alice_by_name = graph.find_nodes_by_property("name", &PropertyValue::String("Alice".to_string()))?; 121 + assert_eq!(alice_by_name.len(), 1); 122 + assert!(alice_by_name.contains(&alice_id)); 123 + 124 + // Test flushing indexes to persistent storage 125 + graph.flush_indexes().await?; 126 + 127 + // Verify persistent index manager is available 128 + assert!(graph.persistent_index_manager().is_some()); 129 + 130 + println!("✅ Persistent index manager with RocksDB store tests passed!"); 131 + Ok(()) 132 + } 133 + 134 + /// Test index persistence across restarts 135 + #[cfg(feature = "rocksdb-storage")] 136 + #[tokio::test] 137 + async fn test_index_persistence_across_restarts() -> TestResult { 138 + let temp_dir = TempDir::new()?; 139 + let db_path = temp_dir.path().to_path_buf(); 140 + 141 + // Phase 1: Create graph with data and indexes 142 + { 143 + let storage = Arc::new(RocksDBStore::new(&db_path).await?); 144 + let graph = Graph::with_persistent_indexes(storage); 145 + 146 + // Create test data 147 + let alice_id = graph.create_node(); 148 + let bob_id = graph.create_node(); 149 + 150 + let schema = graph.schema(); 151 + let person_label_id = { 152 + let mut schema_guard = schema.write(); 153 + schema_guard.get_or_create_label("Person") 154 + }; 155 + let name_prop_id = { 156 + let mut schema_guard = schema.write(); 157 + schema_guard.get_or_create_property_key("name") 158 + }; 159 + 160 + // Update nodes 161 + graph.update_node(alice_id, |node| { 162 + node.add_label(person_label_id); 163 + node.properties.insert(name_prop_id, PropertyValue::String("Alice".to_string())); 164 + })?; 165 + 166 + graph.update_node(bob_id, |node| { 167 + node.add_label(person_label_id); 168 + node.properties.insert(name_prop_id, PropertyValue::String("Bob".to_string())); 169 + })?; 170 + 171 + // Explicitly create indexes 172 + if let Some(persistent_manager) = graph.persistent_index_manager() { 173 + persistent_manager.create_index( 174 + IndexType::Label(person_label_id), 175 + Some("person_label_index".to_string()), 176 + false 177 + ).await?; 178 + 179 + persistent_manager.create_index( 180 + IndexType::Property(name_prop_id), 181 + Some("name_property_index".to_string()), 182 + false 183 + ).await?; 184 + } 185 + 186 + // Verify indexes work 187 + let person_nodes = graph.find_nodes_by_label("Person")?; 188 + assert_eq!(person_nodes.len(), 2); 189 + 190 + // Flush to storage 191 + graph.flush_indexes().await?; 192 + } 193 + 194 + // Phase 2: Create new graph instance and verify indexes are loaded 195 + { 196 + let storage = Arc::new(RocksDBStore::new(&db_path).await?); 197 + let graph = Graph::with_persistent_indexes(storage); 198 + 199 + // Load indexes from storage 200 + graph.load_indexes().await?; 201 + 202 + // Verify indexes are available 203 + if let Some(persistent_manager) = graph.persistent_index_manager() { 204 + let indexes = persistent_manager.list_indexes().await?; 205 + 206 + // We should have the indexes we created 207 + let index_names: Vec<_> = indexes.iter() 208 + .filter_map(|config| config.name.as_ref()) 209 + .collect(); 210 + 211 + // At minimum, we should have some indexes 212 + assert!(!index_names.is_empty()); 213 + 214 + println!("Found {} indexes after restart", index_names.len()); 215 + for name in &index_names { 216 + println!(" - {}", name); 217 + } 218 + } 219 + } 220 + 221 + println!("✅ Index persistence across restarts tests passed!"); 222 + Ok(()) 223 + } 224 + 225 + /// Test composite index persistence 226 + #[tokio::test] 227 + async fn test_composite_index_persistence() -> TestResult { 228 + let storage = Arc::new(MemoryStore::new()); 229 + let graph = Graph::with_persistent_indexes(storage); 230 + 231 + // Create composite index 232 + let schema = graph.schema(); 233 + let name_prop_id = { 234 + let mut schema_guard = schema.write(); 235 + schema_guard.get_or_create_property_key("name") 236 + }; 237 + let age_prop_id = { 238 + let mut schema_guard = schema.write(); 239 + schema_guard.get_or_create_property_key("age") 240 + }; 241 + 242 + if let Some(persistent_manager) = graph.persistent_index_manager() { 243 + let composite_index_type = IndexType::Composite(vec![name_prop_id, age_prop_id]); 244 + let index_name = persistent_manager.create_index( 245 + composite_index_type, 246 + Some("name_age_composite".to_string()), 247 + false 248 + ).await?; 249 + assert_eq!(index_name, "name_age_composite"); 250 + 251 + // Create test data 252 + let alice_id = graph.create_node(); 253 + graph.update_node(alice_id, |node| { 254 + node.properties.insert(name_prop_id, PropertyValue::String("Alice".to_string())); 255 + node.properties.insert(age_prop_id, PropertyValue::Integer(30)); 256 + })?; 257 + 258 + // Test composite query 259 + let alice_nodes = persistent_manager.get_nodes_by_composite_key( 260 + &[name_prop_id, age_prop_id], 261 + &[PropertyValue::String("Alice".to_string()), PropertyValue::Integer(30)] 262 + ).await?; 263 + assert_eq!(alice_nodes.len(), 1); 264 + assert!(alice_nodes.contains(&alice_id)); 265 + 266 + // Test persistence 267 + persistent_manager.flush().await?; 268 + 269 + // Verify index still exists 270 + let indexes = persistent_manager.list_indexes().await?; 271 + let composite_exists = indexes.iter().any(|config| 272 + config.name.as_ref() == Some(&"name_age_composite".to_string()) 273 + ); 274 + assert!(composite_exists); 275 + } 276 + 277 + println!("✅ Composite index persistence tests passed!"); 278 + Ok(()) 279 + } 280 + 281 + /// Test index statistics persistence 282 + #[tokio::test] 283 + async fn test_index_statistics_persistence() -> TestResult { 284 + let storage = Arc::new(MemoryStore::new()); 285 + let graph = Graph::with_persistent_indexes(storage); 286 + 287 + // Create some test data and indexes 288 + let alice_id = graph.create_node(); 289 + let bob_id = graph.create_node(); 290 + let charlie_id = graph.create_node(); 291 + 292 + let schema = graph.schema(); 293 + let person_label_id = { 294 + let mut schema_guard = schema.write(); 295 + schema_guard.get_or_create_label("Person") 296 + }; 297 + let name_prop_id = { 298 + let mut schema_guard = schema.write(); 299 + schema_guard.get_or_create_property_key("name") 300 + }; 301 + 302 + // Update nodes 303 + for (node_id, name) in [(alice_id, "Alice"), (bob_id, "Bob"), (charlie_id, "Charlie")] { 304 + graph.update_node(node_id, |node| { 305 + node.add_label(person_label_id); 306 + node.properties.insert(name_prop_id, PropertyValue::String(name.to_string())); 307 + })?; 308 + } 309 + 310 + if let Some(persistent_manager) = graph.persistent_index_manager() { 311 + // Get statistics 312 + let stats = persistent_manager.get_index_stats().await?; 313 + 314 + // Should have some statistics 315 + assert!(!stats.is_empty()); 316 + 317 + // Check global label stats 318 + if let Some(global_stats) = stats.get("global_labels") { 319 + assert!(global_stats.total_nodes > 0); 320 + println!("Global label index has {} nodes", global_stats.total_nodes); 321 + } 322 + 323 + // Save statistics 324 + persistent_manager.flush().await?; 325 + 326 + println!("Saved {} index statistics", stats.len()); 327 + } 328 + 329 + println!("✅ Index statistics persistence tests passed!"); 330 + Ok(()) 331 + } 332 + 333 + /// Test error handling in persistent index operations 334 + #[tokio::test] 335 + async fn test_persistent_index_error_handling() -> TestResult { 336 + let storage = Arc::new(MemoryStore::new()); 337 + let graph = Graph::with_persistent_indexes(storage); 338 + 339 + if let Some(persistent_manager) = graph.persistent_index_manager() { 340 + // Test creating duplicate index 341 + let property_key_id = gigabrain::PropertyKeyId(1); 342 + let index_type = IndexType::Property(property_key_id); 343 + 344 + let index_name1 = persistent_manager.create_index( 345 + index_type.clone(), 346 + Some("duplicate_test".to_string()), 347 + false 348 + ).await?; 349 + assert_eq!(index_name1, "duplicate_test"); 350 + 351 + // Attempting to create the same index should fail 352 + let duplicate_result = persistent_manager.create_index( 353 + index_type, 354 + Some("duplicate_test".to_string()), 355 + false 356 + ).await; 357 + assert!(duplicate_result.is_err()); 358 + 359 + // Test dropping non-existent index 360 + let drop_result = persistent_manager.drop_index("non_existent_index").await; 361 + assert!(drop_result.is_err()); 362 + 363 + // Test querying non-existent composite index 364 + let query_result = persistent_manager.get_nodes_by_composite_key( 365 + &[gigabrain::PropertyKeyId(999), gigabrain::PropertyKeyId(1000)], 366 + &[PropertyValue::String("test".to_string()), PropertyValue::Integer(1)] 367 + ).await; 368 + // Should return empty result, not error 369 + assert!(query_result.is_ok()); 370 + assert!(query_result.unwrap().is_empty()); 371 + } 372 + 373 + println!("✅ Persistent index error handling tests passed!"); 374 + Ok(()) 375 + } 376 + 377 + /// Test performance of persistent index operations 378 + #[tokio::test] 379 + async fn test_persistent_index_performance() -> TestResult { 380 + let storage = Arc::new(MemoryStore::new()); 381 + let graph = Graph::with_persistent_indexes(storage); 382 + 383 + // Create a larger dataset 384 + let mut node_ids = Vec::new(); 385 + let schema = graph.schema(); 386 + let person_label_id = { 387 + let mut schema_guard = schema.write(); 388 + schema_guard.get_or_create_label("Person") 389 + }; 390 + let name_prop_id = { 391 + let mut schema_guard = schema.write(); 392 + schema_guard.get_or_create_property_key("name") 393 + }; 394 + let age_prop_id = { 395 + let mut schema_guard = schema.write(); 396 + schema_guard.get_or_create_property_key("age") 397 + }; 398 + 399 + // Create 100 nodes 400 + let start = std::time::Instant::now(); 401 + for i in 0..100 { 402 + let node_id = graph.create_node(); 403 + graph.update_node(node_id, |node| { 404 + node.add_label(person_label_id); 405 + node.properties.insert(name_prop_id, PropertyValue::String(format!("Person_{:03}", i))); 406 + node.properties.insert(age_prop_id, PropertyValue::Integer(20 + (i % 50))); 407 + })?; 408 + node_ids.push(node_id); 409 + } 410 + let creation_time = start.elapsed(); 411 + println!("Created 100 nodes with indexes in {:?}", creation_time); 412 + 413 + if let Some(persistent_manager) = graph.persistent_index_manager() { 414 + // Test query performance 415 + let start = std::time::Instant::now(); 416 + let person_nodes = persistent_manager.get_nodes_by_label(person_label_id).await?; 417 + let query_time = start.elapsed(); 418 + assert_eq!(person_nodes.len(), 100); 419 + println!("Queried {} nodes by label in {:?}", person_nodes.len(), query_time); 420 + 421 + // Test property query performance 422 + let start = std::time::Instant::now(); 423 + let specific_person = persistent_manager.get_nodes_by_property( 424 + name_prop_id, 425 + &PropertyValue::String("Person_050".to_string()) 426 + ).await?; 427 + let prop_query_time = start.elapsed(); 428 + assert_eq!(specific_person.len(), 1); 429 + println!("Queried specific person by property in {:?}", prop_query_time); 430 + 431 + // Test flush performance 432 + let start = std::time::Instant::now(); 433 + persistent_manager.flush().await?; 434 + let flush_time = start.elapsed(); 435 + println!("Flushed indexes in {:?}", flush_time); 436 + 437 + // Get statistics 438 + let stats = persistent_manager.get_index_stats().await?; 439 + println!("Index statistics: {} indexes tracked", stats.len()); 440 + } 441 + 442 + println!("✅ Persistent index performance tests passed!"); 443 + Ok(()) 444 + }