risingwave_expr_impl/scalar/
to_char.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{Debug, Write};
16use std::sync::LazyLock;
17
18use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
19use chrono::format::{Item, StrftimeItems};
20use chrono::{Datelike, NaiveDate};
21use risingwave_common::types::{Interval, Timestamp, Timestamptz};
22use risingwave_expr::{ExprError, Result, function};
23
24use super::timestamptz::time_zone_err;
25use crate::scalar::arithmetic_op::timestamp_interval_add;
26
27type Pattern<'a> = Vec<chrono::format::Item<'a>>;
28
29#[inline(always)]
30fn invalid_pattern_err() -> ExprError {
31    ExprError::InvalidParam {
32        name: "pattern",
33        reason: "invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.".into(),
34    }
35}
36
37self_cell::self_cell! {
38    pub struct ChronoPattern {
39        owner: String,
40        #[covariant]
41        dependent: Pattern,
42    }
43}
44
45impl Debug for ChronoPattern {
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        f.debug_struct("ChronoPattern")
48            .field("tmpl", self.borrow_owner())
49            .finish()
50    }
51}
52
53impl ChronoPattern {
54    /// Compile the pg pattern to chrono pattern.
55    // TODO: Chrono can not fully support the pg format, so consider using other implementations
56    // later.
57    pub fn compile(tmpl: &str) -> ChronoPattern {
58        // mapping from pg pattern to chrono pattern
59        // pg pattern: https://www.postgresql.org/docs/current/functions-formatting.html
60        // chrono pattern: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
61        const PATTERNS: &[(&str, &str)] = &[
62            ("HH24", "%H"),
63            ("hh24", "%H"),
64            ("HH12", "%I"),
65            ("hh12", "%I"),
66            ("HH", "%I"),
67            ("hh", "%I"),
68            ("AM", "%p"),
69            ("PM", "%p"),
70            ("am", "%P"),
71            ("pm", "%P"),
72            ("MI", "%M"),
73            ("mi", "%M"),
74            ("SS", "%S"),
75            ("ss", "%S"),
76            ("YYYY", "%Y"),
77            ("yyyy", "%Y"),
78            ("YY", "%y"),
79            ("yy", "%y"),
80            ("IYYY", "%G"),
81            ("iyyy", "%G"),
82            ("IY", "%g"),
83            ("iy", "%g"),
84            ("MM", "%m"),
85            ("mm", "%m"),
86            ("Month", "%B"),
87            ("Mon", "%b"),
88            ("DD", "%d"),
89            ("dd", "%d"),
90            ("NS", "%9f"),
91            ("ns", "%9f"),
92            ("US", "%6f"),
93            ("us", "%6f"),
94            ("MS", "%3f"),
95            ("ms", "%3f"),
96            ("TZH:TZM", "%:z"),
97            ("tzh:tzm", "%:z"),
98            ("TZHTZM", "%z"),
99            ("tzhtzm", "%z"),
100            ("TZH", "%#z"),
101            ("tzh", "%#z"),
102        ];
103        // build an Aho-Corasick automaton for fast matching
104        static AC: LazyLock<AhoCorasick> = LazyLock::new(|| {
105            AhoCorasickBuilder::new()
106                .ascii_case_insensitive(false)
107                .match_kind(aho_corasick::MatchKind::LeftmostLongest)
108                .build(PATTERNS.iter().map(|(k, _)| k))
109                .expect("failed to build an Aho-Corasick automaton")
110        });
111
112        ChronoPattern::compile_inner(tmpl, PATTERNS, &AC)
113    }
114
115    pub fn compile_for_interval(tmpl: &str) -> ChronoPattern {
116        // mapping from pg pattern to chrono pattern
117        // pg pattern: https://www.postgresql.org/docs/current/functions-formatting.html
118        // chrono pattern: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
119        const PATTERNS: &[(&str, &str)] = &[
120            ("HH24", "%H"),
121            ("hh24", "%H"),
122            ("HH12", "%I"),
123            ("hh12", "%I"),
124            ("HH", "%I"),
125            ("hh", "%I"),
126            ("AM", "%p"),
127            ("PM", "%p"),
128            ("am", "%P"),
129            ("pm", "%P"),
130            ("MI", "%M"),
131            ("mi", "%M"),
132            ("SS", "%S"),
133            ("ss", "%S"),
134            ("YYYY", "%Y"),
135            ("yyyy", "%Y"),
136            ("YY", "%y"),
137            ("yy", "%y"),
138            ("IYYY", "%G"),
139            ("iyyy", "%G"),
140            ("IY", "%g"),
141            ("iy", "%g"),
142            ("MM", "%m"),
143            ("mm", "%m"),
144            ("Month", "%B"),
145            ("Mon", "%b"),
146            ("DD", "%d"),
147            ("dd", "%d"),
148            ("US", "%.6f"), /* "%6f" and "%3f" are converted to private data structures in chrono, so we use "%.6f" and "%.3f" instead. */
149            ("us", "%.6f"),
150            ("MS", "%.3f"),
151            ("ms", "%.3f"),
152            ("TZH:TZM", "%:z"),
153            ("tzh:tzm", "%:z"),
154            ("TZHTZM", "%z"),
155            ("tzhtzm", "%z"),
156            ("TZH", "%#z"),
157            ("tzh", "%#z"),
158        ];
159        // build an Aho-Corasick automaton for fast matching
160        static AC: LazyLock<AhoCorasick> = LazyLock::new(|| {
161            AhoCorasickBuilder::new()
162                .ascii_case_insensitive(false)
163                .match_kind(aho_corasick::MatchKind::LeftmostLongest)
164                .build(PATTERNS.iter().map(|(k, _)| k))
165                .expect("failed to build an Aho-Corasick automaton")
166        });
167        ChronoPattern::compile_inner(tmpl, PATTERNS, &AC)
168    }
169
170    fn compile_inner(
171        tmpl: &str,
172        patterns: &[(&str, &str)],
173        ac: &LazyLock<AhoCorasick>,
174    ) -> ChronoPattern {
175        // replace all pg patterns with chrono patterns
176        let mut chrono_tmpl = String::new();
177        ac.replace_all_with(tmpl, &mut chrono_tmpl, |mat, _, dst| {
178            dst.push_str(patterns[mat.pattern()].1);
179            true
180        });
181        tracing::debug!(tmpl, chrono_tmpl, "compile_pattern_to_chrono");
182        ChronoPattern::new(chrono_tmpl, |tmpl| {
183            StrftimeItems::new(tmpl).collect::<Vec<_>>()
184        })
185    }
186}
187
188#[function(
189    "to_char(timestamp, varchar) -> varchar",
190    prebuild = "ChronoPattern::compile($1)"
191)]
192fn timestamp_to_char(data: Timestamp, pattern: &ChronoPattern, writer: &mut impl Write) {
193    let format = data.0.format_with_items(pattern.borrow_dependent().iter());
194    write!(writer, "{}", format).unwrap();
195}
196
197#[function("to_char(timestamptz, varchar) -> varchar", rewritten)]
198fn _timestamptz_to_char() {}
199
200#[function(
201    "to_char(timestamptz, varchar, varchar) -> varchar",
202    prebuild = "ChronoPattern::compile($1)"
203)]
204fn timestamptz_to_char3(
205    data: Timestamptz,
206    zone: &str,
207    tmpl: &ChronoPattern,
208    writer: &mut impl Write,
209) -> Result<()> {
210    let format = data
211        .to_datetime_in_zone(Timestamptz::lookup_time_zone(zone).map_err(time_zone_err)?)
212        .format_with_items(tmpl.borrow_dependent().iter());
213    write!(writer, "{}", format).unwrap();
214    Ok(())
215}
216
217#[function(
218    "to_char(interval, varchar) -> varchar",
219    prebuild = "ChronoPattern::compile_for_interval($1)"
220)]
221fn interval_to_char(
222    interval: Interval,
223    pattern: &ChronoPattern,
224    writer: &mut impl Write,
225) -> Result<()> {
226    for iter in pattern.borrow_dependent() {
227        format_inner(writer, interval, iter)?;
228    }
229    Ok(())
230}
231
232fn adjust_to_iso_year(interval: Interval) -> Result<i32> {
233    let start = risingwave_common::types::Timestamp(
234        NaiveDate::from_ymd_opt(0, 1, 1)
235            .unwrap()
236            .and_hms_opt(0, 0, 0)
237            .unwrap(),
238    );
239    let interval = Interval::from_month_day_usec(interval.months(), interval.days(), 0);
240    let date = timestamp_interval_add(start, interval)?;
241    Ok(date.0.iso_week().year())
242}
243
244fn format_inner(w: &mut impl Write, interval: Interval, item: &Item<'_>) -> Result<()> {
245    match *item {
246        Item::Literal(s) | Item::Space(s) => {
247            w.write_str(s).unwrap();
248            Ok(())
249        }
250        Item::OwnedLiteral(ref s) | Item::OwnedSpace(ref s) => {
251            w.write_str(s).unwrap();
252            Ok(())
253        }
254        Item::Numeric(ref spec, _) => {
255            use chrono::format::Numeric::*;
256            match *spec {
257                Year => {
258                    let year = interval.years_field();
259                    if year < 0 {
260                        write!(w, "{:+05}", year).unwrap();
261                    } else {
262                        write!(w, "{:04}", year).unwrap();
263                    }
264                }
265                YearMod100 => {
266                    let year = interval.years_field();
267                    if year % 100 < 0 {
268                        let year = -((-year) % 100);
269                        write!(w, "{:+03}", year).unwrap();
270                    } else {
271                        let year = year % 100;
272                        write!(w, "{:02}", year).unwrap();
273                    }
274                }
275                IsoYear => {
276                    let iso_year = adjust_to_iso_year(interval)?;
277                    if interval.years_field() < 0 {
278                        write!(w, "{:+05}", iso_year).unwrap();
279                    } else {
280                        write!(w, "{:04}", iso_year).unwrap();
281                    }
282                }
283                IsoYearMod100 => {
284                    let iso_year = adjust_to_iso_year(interval)?;
285                    if interval.years_field() % 100 < 0 {
286                        let iso_year = -((-iso_year) % 100);
287                        write!(w, "{:+03}", iso_year).unwrap();
288                    } else {
289                        let iso_year = iso_year % 100;
290                        write!(w, "{:02}", iso_year).unwrap();
291                    }
292                }
293                Month => {
294                    let month = interval.months_field();
295                    if month < 0 {
296                        write!(w, "{:+03}", month).unwrap();
297                    } else {
298                        write!(w, "{:02}", month).unwrap();
299                    }
300                }
301                Day => {
302                    let day = interval.days_field();
303                    if day < 0 {
304                        write!(w, "{:+02}", day).unwrap();
305                    } else {
306                        write!(w, "{:02}", day).unwrap();
307                    }
308                }
309                Hour => {
310                    let hour = interval.hours_field();
311                    if hour < 0 {
312                        write!(w, "{:+03}", hour).unwrap();
313                    } else {
314                        write!(w, "{:02}", hour).unwrap();
315                    }
316                }
317                Hour12 => {
318                    let hour = interval.hours_field();
319                    if hour < 0 {
320                        // here to align with postgres, we format -0 as 012.
321                        let hour = -(-hour) % 12;
322                        if hour == 0 {
323                            w.write_str("012").unwrap();
324                        } else {
325                            write!(w, "{:+03}", hour).unwrap();
326                        }
327                    } else {
328                        let hour = if hour % 12 == 0 { 12 } else { hour % 12 };
329                        write!(w, "{:02}", hour).unwrap();
330                    }
331                }
332                Minute => {
333                    let minute = interval.usecs() / 1_000_000 / 60;
334                    if minute % 60 < 0 {
335                        let minute = -((-minute) % 60);
336                        write!(w, "{:+03}", minute).unwrap();
337                    } else {
338                        let minute = minute % 60;
339                        write!(w, "{:02}", minute).unwrap();
340                    }
341                }
342                Second => {
343                    let second = interval.usecs() / 1_000_000;
344                    if second % 60 < 0 {
345                        let second = -((-second) % 60);
346                        write!(w, "{:+03}", second).unwrap();
347                    } else {
348                        let second = second % 60;
349                        write!(w, "{:02}", second).unwrap();
350                    }
351                }
352                Nanosecond | Ordinal | WeekdayFromMon | NumDaysFromSun | IsoWeek | WeekFromSun
353                | WeekFromMon | IsoYearDiv100 | Timestamp | YearDiv100 | Internal(_) => {
354                    unreachable!()
355                }
356                _ => unreachable!(),
357            }
358            Ok(())
359        }
360        Item::Fixed(ref spec) => {
361            use chrono::format::Fixed::*;
362            match *spec {
363                LowerAmPm => {
364                    if interval.hours_field() % 24 >= 12 {
365                        w.write_str("pm").unwrap();
366                    } else {
367                        w.write_str("am").unwrap();
368                    }
369                    Ok(())
370                }
371                UpperAmPm => {
372                    if interval.hours_field() % 24 >= 12 {
373                        w.write_str("PM").unwrap();
374                    } else {
375                        w.write_str("AM").unwrap();
376                    }
377                    Ok(())
378                }
379                Nanosecond3 => {
380                    let usec = interval.usecs() % 1_000_000;
381                    write!(w, "{:03}", usec / 1000).unwrap();
382                    Ok(())
383                }
384                Nanosecond6 => {
385                    let usec = interval.usecs() % 1_000_000;
386                    write!(w, "{:06}", usec).unwrap();
387                    Ok(())
388                }
389                Internal(_) | ShortMonthName | LongMonthName | TimezoneOffset | TimezoneOffsetZ
390                | TimezoneOffsetColon => Err(invalid_pattern_err()),
391                ShortWeekdayName
392                | LongWeekdayName
393                | TimezoneName
394                | TimezoneOffsetDoubleColon
395                | TimezoneOffsetTripleColon
396                | TimezoneOffsetColonZ
397                | Nanosecond
398                | Nanosecond9
399                | RFC2822
400                | RFC3339 => unreachable!(),
401                _ => unreachable!(),
402            }
403        }
404        Item::Error => Err(invalid_pattern_err()),
405    }
406}