risingwave_sqlsmith/sqlreduce/
rules.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Rules-based SQL reduction system.
16//!
17//! This module defines reduction rules and operations for different AST node types,
18//! providing configurable reduction behavior for comprehensive SQL simplification.
19
20use std::collections::HashMap;
21
22use risingwave_sqlparser::ast::*;
23
24use crate::sqlreduce::path::{AstField, AstNode, AstPath, PathComponent};
25
26/// Defines what actions can be performed on an AST node during reduction.
27#[derive(Debug, Clone, Default)]
28pub struct ReductionRule {
29    /// Whether to try replacing this node with NULL/None
30    pub try_null: bool,
31    /// Attributes to descend into for further reduction
32    pub descend: Vec<AstField>,
33    /// Attributes that can be removed (set to None)
34    pub remove: Vec<AstField>,
35    /// Attributes whose children can be pulled up to replace this node
36    pub pullup: Vec<AstField>,
37    /// Attributes whose subtrees can replace this entire node
38    pub replace: Vec<AstField>,
39}
40
41/// Repository of reduction rules for different AST node types.
42/// Configures how different SQL constructs should be reduced.
43pub struct ReductionRules {
44    rules: HashMap<String, ReductionRule>,
45}
46
47impl Default for ReductionRules {
48    fn default() -> Self {
49        let mut rules = HashMap::new();
50
51        // SelectStmt rules (most important for SQL reduction)
52        // Dependency order (independent first, dependent later):
53        // 1. Selection (WHERE) - independent, doesn't affect other clauses
54        // 2. Having - semi-independent, requires GROUP BY but rarely referenced
55        // 3. GroupBy - dependent, SELECT may reference GROUP BY columns
56        // 4. Projection (SELECT list) - dependent, may reference GROUP BY
57        // 5. From - fundamental, almost everything depends on it
58        rules.insert(
59            "Select".to_owned(),
60            ReductionRule {
61                try_null: false,
62                descend: vec![
63                    AstField::Projection,
64                    AstField::From,
65                    AstField::Selection,
66                    AstField::GroupBy,
67                    AstField::Having,
68                ],
69                remove: vec![
70                    AstField::Selection,  // Try removing WHERE first (most independent)
71                    AstField::Having,     // Then HAVING
72                    AstField::Projection, // Then SELECT list
73                    AstField::From,       // Then FROM
74                    AstField::GroupBy,    // Then GROUP BY
75                ],
76                pullup: vec![],
77                replace: vec![],
78            },
79        );
80
81        // Query rules
82        rules.insert(
83            "Query".to_owned(),
84            ReductionRule {
85                try_null: false,
86                descend: vec![AstField::Body, AstField::With, AstField::OrderBy],
87                remove: vec![AstField::With, AstField::OrderBy],
88                pullup: vec![],
89                replace: vec![AstField::Body],
90            },
91        );
92
93        // WITH clause rules
94        rules.insert(
95            "With".to_owned(),
96            ReductionRule {
97                try_null: true,
98                descend: vec![AstField::CteTable],
99                remove: vec![],
100                pullup: vec![],
101                replace: vec![],
102            },
103        );
104
105        // CTE list rules
106        rules.insert(
107            "CteList".to_owned(),
108            ReductionRule {
109                try_null: false,
110                descend: vec![], // Individual CTEs accessed via index
111                remove: vec![],
112                pullup: vec![],
113                replace: vec![],
114            },
115        );
116
117        // CTE rules
118        rules.insert(
119            "Cte".to_owned(),
120            ReductionRule {
121                try_null: false,
122                descend: vec![AstField::CteInner],
123                remove: vec![],
124                pullup: vec![AstField::CteInner],
125                replace: vec![],
126            },
127        );
128
129        // Expression rules - focus on pullup for simplification
130        rules.insert(
131            "BinaryOp".to_owned(),
132            ReductionRule {
133                try_null: true,
134                descend: vec![],
135                remove: vec![],
136                pullup: vec![AstField::Left, AstField::Right],
137                replace: vec![],
138            },
139        );
140
141        rules.insert(
142            "Case".to_owned(),
143            ReductionRule {
144                try_null: true,
145                descend: vec![],
146                remove: vec![],
147                pullup: vec![AstField::Operand, AstField::ElseResult],
148                replace: vec![],
149            },
150        );
151
152        // Function call rules
153        rules.insert(
154            "Function".to_owned(),
155            ReductionRule {
156                try_null: true,
157                descend: vec![], // args is not modeled as an AstField in our enum
158                remove: vec![],
159                pullup: vec![],
160                replace: vec![],
161            },
162        );
163
164        // Subquery rules
165        rules.insert(
166            "Subquery".to_owned(),
167            ReductionRule {
168                try_null: true,
169                descend: vec![],
170                remove: vec![],
171                pullup: vec![],
172                replace: vec![AstField::Subquery],
173            },
174        );
175
176        // Constant rules
177        rules.insert(
178            "Value".to_owned(),
179            ReductionRule {
180                try_null: true,
181                descend: vec![],
182                remove: vec![],
183                pullup: vec![],
184                replace: vec![],
185            },
186        );
187
188        // List reduction rules for SQL collections
189        rules.insert(
190            "SelectItemList".to_owned(),
191            ReductionRule {
192                try_null: false,
193                descend: vec![],
194                remove: vec![],
195                pullup: vec![],
196                replace: vec![],
197            },
198        );
199
200        rules.insert(
201            "ExprList".to_owned(),
202            ReductionRule {
203                try_null: true,
204                descend: vec![],
205                remove: vec![],
206                pullup: vec![],
207                replace: vec![],
208            },
209        );
210
211        rules.insert(
212            "TableList".to_owned(),
213            ReductionRule {
214                try_null: false,
215                descend: vec![],
216                remove: vec![],
217                pullup: vec![],
218                replace: vec![],
219            },
220        );
221
222        rules.insert(
223            "OrderByList".to_owned(),
224            ReductionRule {
225                try_null: false,
226                descend: vec![],
227                remove: vec![],
228                pullup: vec![],
229                replace: vec![],
230            },
231        );
232
233        // JOIN-related rules for better JOIN handling
234        rules.insert(
235            "TableWithJoins".to_owned(),
236            ReductionRule {
237                try_null: false,
238                descend: vec![AstField::Relation, AstField::Joins],
239                remove: vec![AstField::Joins],
240                pullup: vec![AstField::Relation],
241                replace: vec![AstField::Relation],
242            },
243        );
244
245        rules.insert(
246            "Join".to_owned(),
247            ReductionRule {
248                try_null: true,
249                descend: vec![AstField::Relation],
250                remove: vec![],
251                pullup: vec![AstField::Relation],
252                replace: vec![AstField::Relation],
253            },
254        );
255
256        rules.insert(
257            "TableFactor".to_owned(),
258            ReductionRule {
259                try_null: true, // Allow trying NULL for derived tables
260                descend: vec![AstField::Subquery],
261                remove: vec![AstField::Subquery], // Allow removing subquery entirely
262                pullup: vec![AstField::Subquery],
263                replace: vec![AstField::Subquery],
264            },
265        );
266
267        rules.insert(
268            "JoinList".to_owned(),
269            ReductionRule {
270                try_null: true, // Allow trying NULL for join lists
271                descend: vec![],
272                remove: vec![], // Remove individual joins through list operations
273                pullup: vec![],
274                replace: vec![],
275            },
276        );
277
278        Self { rules }
279    }
280}
281
282impl ReductionRules {
283    /// Get the reduction rule for a specific node type.
284    pub fn get_rule(&self, node_type: &str) -> Option<&ReductionRule> {
285        self.rules.get(node_type)
286    }
287
288    /// Get the node type string for an AST node.
289    pub fn get_node_type(node: &AstNode) -> String {
290        match node {
291            AstNode::Statement(_) => "Statement".to_owned(),
292            AstNode::Query(_) => "Query".to_owned(),
293            AstNode::Select(_) => "Select".to_owned(),
294            AstNode::Expr(expr) => match expr {
295                Expr::BinaryOp { .. } => "BinaryOp".to_owned(),
296                Expr::Case { .. } => "Case".to_owned(),
297                Expr::Function { .. } => "Function".to_owned(),
298                Expr::Subquery(_) => "Subquery".to_owned(),
299                Expr::Value(_) => "Value".to_owned(),
300                _ => "Expr".to_owned(),
301            },
302            AstNode::SelectItem(_) => "SelectItem".to_owned(),
303            AstNode::TableWithJoins(_) => "TableWithJoins".to_owned(),
304            AstNode::Join(_) => "Join".to_owned(),
305            AstNode::TableFactor(_) => "TableFactor".to_owned(),
306            AstNode::OrderByExpr(_) => "OrderByExpr".to_owned(),
307            AstNode::With(_) => "With".to_owned(),
308            AstNode::Cte(_) => "Cte".to_owned(),
309            AstNode::ExprList(_) => "ExprList".to_owned(),
310            AstNode::SelectItemList(_) => "SelectItemList".to_owned(),
311            AstNode::TableList(_) => "TableList".to_owned(),
312            AstNode::JoinList(_) => "JoinList".to_owned(),
313            AstNode::OrderByList(_) => "OrderByList".to_owned(),
314            AstNode::CteList(_) => "CteList".to_owned(),
315            AstNode::Option(_) => "Option".to_owned(),
316        }
317    }
318}
319
320/// Different types of reduction operations that can be applied.
321#[derive(Debug, Clone)]
322pub enum ReductionOperation {
323    /// Replace node with NULL/None
324    TryNull,
325    /// Remove a specific attribute (set to None)
326    Remove(AstField),
327    /// Pull up a subnode to replace this node
328    Pullup(AstField),
329    /// Replace this node with a subtree
330    Replace(AstField),
331    /// Remove an element from a list/tuple
332    RemoveListElement(usize),
333}
334
335impl ReductionOperation {
336    /// Get the priority score for this operation (higher = tried first).
337    /// Uses dependency-aware priority: independent components removed first,
338    /// then dependent components to minimize validation failures.
339    pub fn priority(&self) -> i32 {
340        match self {
341            // List element removal: Very safe, high success rate
342            // Removing a single item rarely breaks the query structure
343            ReductionOperation::RemoveListElement(_) => 100,
344
345            // Remove operations: Priority based on SQL dependencies
346            // Independent clauses get higher priority, dependent ones lower
347            ReductionOperation::Remove(field) => {
348                use AstField::*;
349                match field {
350                    // Tier 1: Fully independent clauses (no other clause depends on them)
351                    // WHERE is independent - can be removed without affecting structure
352                    Selection => 95,
353                    // ORDER BY is independent - only affects result ordering
354                    OrderBy => 94,
355                    // LIMIT/OFFSET are independent
356                    Limit | Offset => 93,
357                    // WITH/CTE can be removed if not referenced
358                    With => 92,
359
360                    // Tier 2: Semi-independent clauses
361                    // HAVING depends on aggregations, but often removable
362                    Having => 85,
363
364                    // Tier 3: Core dependent clauses (have circular dependencies)
365                    // GROUP BY and Projection (SELECT list) depend on each other
366                    // Removing these is riskier as they form the core query structure
367                    GroupBy => 70,
368                    Projection => 65,
369
370                    // FROM is most fundamental - almost everything depends on it
371                    From => 60,
372
373                    // Other fields get default priority
374                    _ => 80,
375                }
376            }
377
378            // Replace with subtree: Medium risk, good success rate
379            // Simplifies structure while preserving a valid subtree
380            ReductionOperation::Replace(_) => 55,
381
382            // Pullup: Medium-low risk, variable success rate
383            // Depends heavily on context compatibility
384            ReductionOperation::Pullup(_) => 40,
385
386            // Try NULL: High risk, lower success rate
387            // Often breaks type constraints or NOT NULL requirements
388            ReductionOperation::TryNull => 20,
389        }
390    }
391}
392
393/// A reduction candidate: a path to a node and the operation to apply.
394#[derive(Debug, Clone)]
395pub struct ReductionCandidate {
396    pub path: AstPath,
397    pub operation: ReductionOperation,
398}
399
400/// Generate all possible reduction candidates for a given AST.
401/// Systematically creates all viable reduction operations.
402pub fn generate_reduction_candidates(
403    root: &AstNode,
404    rules: &ReductionRules,
405    paths: &[AstPath],
406) -> Vec<ReductionCandidate> {
407    let mut candidates = Vec::new();
408
409    tracing::debug!("Generating reduction candidates for {} paths", paths.len());
410
411    for (path_idx, path) in paths.iter().enumerate() {
412        if let Some(node) = crate::sqlreduce::path::get_node_at_path(root, path) {
413            let node_type = ReductionRules::get_node_type(&node);
414            let path_str = crate::sqlreduce::path::display_ast_path(path);
415
416            tracing::debug!("Path {}: {} ({})", path_idx, path_str, node_type);
417
418            // Handle list/tuple removals (most important for reduction)
419            // Generate in reverse order so batch processing works correctly:
420            // removing higher indices first doesn't affect lower indices
421            match &node {
422                AstNode::SelectItemList(items) if items.len() > 1 => {
423                    tracing::debug!(
424                        "Adding {} RemoveListElement candidates for SelectItemList (reverse order)",
425                        items.len()
426                    );
427                    for i in (0..items.len()).rev() {
428                        candidates.push(ReductionCandidate {
429                            path: path.clone(),
430                            operation: ReductionOperation::RemoveListElement(i),
431                        });
432                    }
433                }
434                AstNode::ExprList(exprs) if exprs.len() > 1 => {
435                    tracing::debug!(
436                        "Adding {} RemoveListElement candidates for ExprList (reverse order)",
437                        exprs.len()
438                    );
439                    for i in (0..exprs.len()).rev() {
440                        candidates.push(ReductionCandidate {
441                            path: path.clone(),
442                            operation: ReductionOperation::RemoveListElement(i),
443                        });
444                    }
445                }
446                AstNode::TableList(tables) if tables.len() > 1 => {
447                    tracing::debug!(
448                        "Adding {} RemoveListElement candidates for TableList (reverse order)",
449                        tables.len()
450                    );
451                    for i in (0..tables.len()).rev() {
452                        candidates.push(ReductionCandidate {
453                            path: path.clone(),
454                            operation: ReductionOperation::RemoveListElement(i),
455                        });
456                    }
457                }
458                AstNode::OrderByList(orders) if orders.len() > 1 => {
459                    tracing::debug!(
460                        "Adding {} RemoveListElement candidates for OrderByList (reverse order)",
461                        orders.len()
462                    );
463                    for i in (0..orders.len()).rev() {
464                        candidates.push(ReductionCandidate {
465                            path: path.clone(),
466                            operation: ReductionOperation::RemoveListElement(i),
467                        });
468                    }
469                }
470                _ => {}
471            }
472
473            // Apply rule-based reductions
474            if let Some(rule) = rules.get_rule(&node_type) {
475                let mut rule_candidates = 0;
476
477                // Try null replacement
478                if rule.try_null {
479                    candidates.push(ReductionCandidate {
480                        path: path.clone(),
481                        operation: ReductionOperation::TryNull,
482                    });
483                    rule_candidates += 1;
484                }
485
486                // Try attribute removal
487                for attr in &rule.remove {
488                    candidates.push(ReductionCandidate {
489                        path: path.clone(),
490                        operation: ReductionOperation::Remove(attr.clone()),
491                    });
492                    rule_candidates += 1;
493                }
494
495                // Try pullup operations
496                for attr in &rule.pullup {
497                    candidates.push(ReductionCandidate {
498                        path: path.clone(),
499                        operation: ReductionOperation::Pullup(attr.clone()),
500                    });
501                    rule_candidates += 1;
502                }
503
504                // Try replace operations
505                for attr in &rule.replace {
506                    candidates.push(ReductionCandidate {
507                        path: path.clone(),
508                        operation: ReductionOperation::Replace(attr.clone()),
509                    });
510                    rule_candidates += 1;
511                }
512
513                if rule_candidates > 0 {
514                    tracing::debug!(
515                        "Added {} rule-based candidates for {}",
516                        rule_candidates,
517                        node_type
518                    );
519                }
520            } else {
521                tracing::debug!("No rules found for node type: {}", node_type);
522            }
523        }
524    }
525
526    tracing::debug!(
527        "Generated {} total candidates from {} paths (before sorting)",
528        candidates.len(),
529        paths.len()
530    );
531
532    // Sort candidates by dependency-aware priority (higher priority first)
533    // Independent clauses (WHERE, ORDER BY) tried first, then dependent ones (SELECT, GROUP BY)
534    candidates.sort_by(|a, b| b.operation.priority().cmp(&a.operation.priority()));
535
536    tracing::debug!(
537        "Sorted candidates by dependency-aware priority - first 10: {:?}",
538        candidates
539            .iter()
540            .take(10)
541            .map(|c| {
542                let op_desc = match &c.operation {
543                    ReductionOperation::Remove(field) => format!("Remove({:?})", field),
544                    op => format!("{:?}", op),
545                };
546                (c.operation.priority(), op_desc)
547            })
548            .collect::<Vec<_>>()
549    );
550
551    candidates
552}
553
554/// Apply a reduction operation to an AST node.
555/// Returns the new AST root if the operation was successful.
556pub fn apply_reduction_operation(
557    root: &AstNode,
558    candidate: &ReductionCandidate,
559) -> Option<AstNode> {
560    use crate::sqlreduce::path::{display_ast_path, get_node_at_path, set_node_at_path};
561
562    tracing::debug!(
563        "apply_reduction_operation: Trying to apply {:?} at path {}",
564        candidate.operation,
565        display_ast_path(&candidate.path)
566    );
567
568    let target_node = get_node_at_path(root, &candidate.path);
569    if target_node.is_none() {
570        tracing::debug!(
571            "apply_reduction_operation: Failed to get node at path {}",
572            display_ast_path(&candidate.path)
573        );
574        return None;
575    }
576    let target_node = target_node?;
577
578    match &candidate.operation {
579        ReductionOperation::TryNull => {
580            // Replace with NULL expression
581            let null_expr = AstNode::Expr(Expr::Value(Value::Null));
582            set_node_at_path(root, &candidate.path, Some(null_expr))
583        }
584
585        ReductionOperation::Remove(field) => {
586            // Remove an attribute (set to None)
587            let attr_path = [
588                candidate.path.clone(),
589                vec![PathComponent::field(field.clone())],
590            ]
591            .concat();
592            tracing::debug!(
593                "apply_reduction_operation: Removing attribute '{}' at path {}",
594                field.to_string(),
595                display_ast_path(&attr_path)
596            );
597
598            let result = set_node_at_path(root, &attr_path, None);
599            if result.is_none() {
600                tracing::debug!(
601                    "apply_reduction_operation: Failed to remove attribute '{}'",
602                    field.to_string()
603                );
604            } else {
605                tracing::debug!(
606                    "apply_reduction_operation: Successfully removed attribute '{}'",
607                    field.to_string()
608                );
609            }
610            result
611        }
612
613        ReductionOperation::Pullup(field) => {
614            // Pull up a subnode to replace the current node
615            let attr_path = [
616                candidate.path.clone(),
617                vec![PathComponent::field(field.clone())],
618            ]
619            .concat();
620            if let Some(subnode) = get_node_at_path(root, &attr_path) {
621                set_node_at_path(root, &candidate.path, Some(subnode))
622            } else {
623                None
624            }
625        }
626
627        ReductionOperation::Replace(field) => {
628            // Replace current node with a subtree
629            let attr_path = [
630                candidate.path.clone(),
631                vec![PathComponent::field(field.clone())],
632            ]
633            .concat();
634            tracing::debug!(
635                "apply_reduction_operation: Replacing with attribute '{}' from path {}",
636                field.to_string(),
637                display_ast_path(&attr_path)
638            );
639
640            if let Some(subtree) = get_node_at_path(root, &attr_path) {
641                tracing::debug!("apply_reduction_operation: Found subtree for replacement");
642                let result = set_node_at_path(root, &candidate.path, Some(subtree));
643                if result.is_none() {
644                    tracing::debug!("apply_reduction_operation: Failed to set replacement subtree");
645                } else {
646                    tracing::debug!(
647                        "apply_reduction_operation: Successfully replaced with subtree"
648                    );
649                }
650                result
651            } else {
652                tracing::debug!(
653                    "apply_reduction_operation: No subtree found at path {}",
654                    display_ast_path(&attr_path)
655                );
656                None
657            }
658        }
659
660        ReductionOperation::RemoveListElement(index) => {
661            // Remove an element from a list
662            match target_node {
663                AstNode::SelectItemList(mut items) => {
664                    if *index < items.len() && items.len() > 1 {
665                        items.remove(*index);
666                        set_node_at_path(
667                            root,
668                            &candidate.path,
669                            Some(AstNode::SelectItemList(items)),
670                        )
671                    } else {
672                        None
673                    }
674                }
675                AstNode::ExprList(mut exprs) => {
676                    if *index < exprs.len() && exprs.len() > 1 {
677                        exprs.remove(*index);
678                        set_node_at_path(root, &candidate.path, Some(AstNode::ExprList(exprs)))
679                    } else {
680                        None
681                    }
682                }
683                AstNode::TableList(mut tables) => {
684                    if *index < tables.len() && tables.len() > 1 {
685                        tables.remove(*index);
686                        set_node_at_path(root, &candidate.path, Some(AstNode::TableList(tables)))
687                    } else {
688                        None
689                    }
690                }
691                AstNode::OrderByList(mut orders) => {
692                    if *index < orders.len() && orders.len() > 1 {
693                        orders.remove(*index);
694                        set_node_at_path(root, &candidate.path, Some(AstNode::OrderByList(orders)))
695                    } else {
696                        None
697                    }
698                }
699                _ => None,
700            }
701        }
702    }
703}
704
705#[cfg(test)]
706mod tests {
707    use risingwave_sqlparser::parser::Parser;
708
709    use super::*;
710    use crate::sqlreduce::path::{enumerate_reduction_paths, statement_to_ast_node};
711
712    #[test]
713    fn test_reduction_candidates() {
714        let sql = "SELECT a, b, c FROM t1, t2;";
715        let parsed = Parser::parse_sql(sql).expect("Failed to parse SQL");
716        let stmt = &parsed[0];
717        let ast_node = statement_to_ast_node(stmt);
718
719        let paths = enumerate_reduction_paths(&ast_node, vec![]);
720        let rules = ReductionRules::default();
721        let candidates = generate_reduction_candidates(&ast_node, &rules, &paths);
722
723        // Should generate multiple candidates for removing SELECT items, FROM tables, etc.
724        assert!(!candidates.is_empty());
725        println!(
726            "Generated {} candidates for complex query",
727            candidates.len()
728        );
729    }
730
731    #[test]
732    fn test_list_element_removal() {
733        let sql = "SELECT a, b, c FROM t;";
734        let parsed = Parser::parse_sql(sql).expect("Failed to parse SQL");
735        let stmt = &parsed[0];
736        let ast_node = statement_to_ast_node(stmt);
737
738        let paths = enumerate_reduction_paths(&ast_node, vec![]);
739        let rules = ReductionRules::default();
740        let candidates = generate_reduction_candidates(&ast_node, &rules, &paths);
741
742        // Find candidates that remove SELECT list elements
743        let list_removals: Vec<_> = candidates
744            .iter()
745            .filter(|c| matches!(c.operation, ReductionOperation::RemoveListElement(_)))
746            .collect();
747
748        // Should find 3 list removal candidates (for a, b, c)
749        assert!(list_removals.len() == 3);
750        println!(
751            "✓ Found {} list element removal candidates as expected",
752            list_removals.len()
753        );
754    }
755}