risingwave_expr_impl/scalar/
extract.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::str::FromStr;
16
17use chrono::{Datelike, NaiveTime, Timelike};
18use risingwave_common::types::{Date, Decimal, F64, Interval, Time, Timestamp, Timestamptz};
19use risingwave_expr::{ExprError, Result, function};
20
21use self::Unit::*;
22use crate::scalar::timestamptz::time_zone_err;
23
24/// Extract field from `Datelike`.
25fn extract_from_datelike(date: impl Datelike, unit: Unit) -> Decimal {
26    match unit {
27        Millennium => ((date.year() - 1) / 1000 + 1).into(),
28        Century => ((date.year() - 1) / 100 + 1).into(),
29        Decade => (date.year() / 10).into(),
30        Year => date.year().into(),
31        IsoYear => date.iso_week().year().into(),
32        Quarter => ((date.month() - 1) / 3 + 1).into(),
33        Month => date.month().into(),
34        Week => date.iso_week().week().into(),
35        Day => date.day().into(),
36        Doy => date.ordinal().into(),
37        Dow => date.weekday().num_days_from_sunday().into(),
38        IsoDow => date.weekday().number_from_monday().into(),
39        u => unreachable!("invalid unit {:?} for date", u),
40    }
41}
42
43/// Extract field from `Timelike`.
44fn extract_from_timelike(time: impl Timelike, unit: Unit) -> Decimal {
45    let nanos = || time.second() as u64 * 1_000_000_000 + time.nanosecond() as u64;
46    match unit {
47        Hour => time.hour().into(),
48        Minute => time.minute().into(),
49        Second => Decimal::from_i128_with_scale(nanos() as i128, 9),
50        Millisecond => Decimal::from_i128_with_scale(nanos() as i128, 6),
51        Microsecond => Decimal::from_i128_with_scale(nanos() as i128, 3),
52        Nanosecond => nanos().into(),
53        Epoch => {
54            let nanos =
55                time.num_seconds_from_midnight() as u64 * 1_000_000_000 + time.nanosecond() as u64;
56            Decimal::from_i128_with_scale(nanos as i128, 9)
57        }
58        u => unreachable!("invalid unit {:?} for time", u),
59    }
60}
61
62#[function(
63    "extract(varchar, date) -> decimal",
64    prebuild = "Unit::from_str($0)?.ensure_date()?"
65)]
66fn extract_from_date(date: Date, unit: &Unit) -> Decimal {
67    match unit {
68        Epoch => {
69            let epoch = date.0.and_time(NaiveTime::default()).and_utc().timestamp();
70            epoch.into()
71        }
72        Julian => {
73            const UNIX_EPOCH_DAY: i32 = 719_163;
74            let julian = date.0.num_days_from_ce() - UNIX_EPOCH_DAY + 2_440_588;
75            julian.into()
76        }
77        _ => extract_from_datelike(date.0, *unit),
78    }
79}
80
81#[function(
82    "extract(varchar, time) -> decimal",
83    prebuild = "Unit::from_str($0)?.ensure_time()?"
84)]
85fn extract_from_time(time: Time, unit: &Unit) -> Decimal {
86    extract_from_timelike(time.0, *unit)
87}
88
89#[function(
90    "extract(varchar, timestamp) -> decimal",
91    prebuild = "Unit::from_str($0)?.ensure_timestamp()?"
92)]
93fn extract_from_timestamp(timestamp: Timestamp, unit: &Unit) -> Decimal {
94    match unit {
95        Epoch => {
96            if let Some(nanos) = timestamp.0.and_utc().timestamp_nanos_opt() {
97                Decimal::from_i128_with_scale(nanos as i128, 9)
98            } else {
99                let micros = timestamp.0.and_utc().timestamp_micros();
100                Decimal::from_i128_with_scale(micros as i128, 6)
101            }
102        }
103        Julian => {
104            let epoch = if let Some(nanos) = timestamp.0.and_utc().timestamp_nanos_opt() {
105                Decimal::from_i128_with_scale(nanos as i128, 9)
106            } else {
107                let epoch = timestamp.0.and_utc().timestamp_micros();
108                Decimal::from_i128_with_scale(epoch as i128, 6)
109            };
110            epoch / (24 * 60 * 60).into() + 2_440_588.into()
111        }
112        _ if unit.is_date_unit() => extract_from_datelike(timestamp.0.date(), *unit),
113        _ if unit.is_time_unit() => extract_from_timelike(timestamp.0.time(), *unit),
114        u => unreachable!("invalid unit {:?} for timestamp", u),
115    }
116}
117
118#[function(
119    "extract(varchar, timestamptz) -> decimal",
120    prebuild = "Unit::from_str($0)?.ensure_timestamptz()?"
121)]
122fn extract_from_timestamptz(tz: Timestamptz, unit: &Unit) -> Decimal {
123    match unit {
124        Epoch => Decimal::from_i128_with_scale(tz.timestamp_micros() as _, 6),
125        // TODO(#5826): all other units depend on implicit session TimeZone
126        u => unreachable!("invalid unit {u:?} for timestamp with time zone"),
127    }
128}
129
130#[function(
131    "extract(varchar, timestamptz, varchar) -> decimal",
132    prebuild = "Unit::from_str($0)?.ensure_timestamptz_at_timezone()?"
133)]
134fn extract_from_timestamptz_at_timezone(
135    input: Timestamptz,
136    timezone: &str,
137    unit: &Unit,
138) -> Result<Decimal> {
139    use chrono::Offset as _;
140
141    let time_zone = Timestamptz::lookup_time_zone(timezone).map_err(time_zone_err)?;
142    let instant_local = input.to_datetime_in_zone(time_zone);
143
144    Ok(match unit {
145        Epoch => Decimal::from_i128_with_scale(instant_local.timestamp_micros() as _, 6),
146        Timezone => {
147            let east_secs = instant_local.offset().fix().local_minus_utc();
148            east_secs.into()
149        }
150        Timezone_Hour => {
151            let east_secs = instant_local.offset().fix().local_minus_utc();
152            (east_secs / 3600).into()
153        }
154        Timezone_Minute => {
155            let east_secs = instant_local.offset().fix().local_minus_utc();
156            (east_secs % 3600 / 60).into()
157        }
158        _ => extract_from_timestamp(instant_local.naive_local().into(), unit),
159    })
160}
161
162#[function(
163    "extract(varchar, interval) -> decimal",
164    prebuild = "Unit::from_str($0)?.ensure_interval()?"
165)]
166fn extract_from_interval(interval: Interval, unit: &Unit) -> Decimal {
167    match unit {
168        Millennium => (interval.years_field() / 1000).into(),
169        Century => (interval.years_field() / 100).into(),
170        Decade => (interval.years_field() / 10).into(),
171        Year => interval.years_field().into(),
172        Quarter => (interval.months_field() / 3 + 1).into(),
173        Month => interval.months_field().into(),
174        Day => interval.days_field().into(),
175        Hour => interval.hours_field().into(),
176        Minute => interval.minutes_field().into(),
177        Second => Decimal::from_i128_with_scale(interval.seconds_in_micros() as i128, 6),
178        Millisecond => Decimal::from_i128_with_scale(interval.seconds_in_micros() as i128, 3),
179        Microsecond => interval.seconds_in_micros().into(),
180        Epoch => Decimal::from_i128_with_scale(interval.epoch_in_micros(), 6),
181        u => unreachable!("invalid unit {:?} for interval", u),
182    }
183}
184
185#[function(
186    "date_part(varchar, date) -> float8",
187    prebuild = "Unit::from_str($0)?.ensure_date()?"
188)]
189fn date_part_from_date(date: Date, unit: &Unit) -> Result<F64> {
190    // date_part of date manually cast to timestamp
191    // https://github.com/postgres/postgres/blob/REL_15_2/src/backend/catalog/system_functions.sql#L123
192    extract_from_timestamp(date.into(), unit)
193        .try_into()
194        .map_err(|_| ExprError::NumericOutOfRange)
195}
196
197#[function(
198    "date_part(varchar, time) -> float8",
199    prebuild = "Unit::from_str($0)?.ensure_time()?"
200)]
201fn date_part_from_time(time: Time, unit: &Unit) -> Result<F64> {
202    extract_from_time(time, unit)
203        .try_into()
204        .map_err(|_| ExprError::NumericOutOfRange)
205}
206
207#[function(
208    "date_part(varchar, timestamp) -> float8",
209    prebuild = "Unit::from_str($0)?.ensure_timestamp()?"
210)]
211fn date_part_from_timestamp(timestamp: Timestamp, unit: &Unit) -> Result<F64> {
212    extract_from_timestamp(timestamp, unit)
213        .try_into()
214        .map_err(|_| ExprError::NumericOutOfRange)
215}
216
217#[function(
218    "date_part(varchar, timestamptz) -> float8",
219    prebuild = "Unit::from_str($0)?.ensure_timestamptz()?"
220)]
221fn date_part_from_timestamptz(input: Timestamptz, unit: &Unit) -> Result<F64> {
222    extract_from_timestamptz(input, unit)
223        .try_into()
224        .map_err(|_| ExprError::NumericOutOfRange)
225}
226
227#[function(
228    "date_part(varchar, timestamptz, varchar) -> float8",
229    prebuild = "Unit::from_str($0)?.ensure_timestamptz_at_timezone()?"
230)]
231fn date_part_from_timestamptz_at_timezone(
232    input: Timestamptz,
233    timezone: &str,
234    unit: &Unit,
235) -> Result<F64> {
236    extract_from_timestamptz_at_timezone(input, timezone, unit)?
237        .try_into()
238        .map_err(|_| ExprError::NumericOutOfRange)
239}
240
241#[function(
242    "date_part(varchar, interval) -> float8",
243    prebuild = "Unit::from_str($0)?.ensure_interval()?"
244)]
245fn date_part_from_interval(interval: Interval, unit: &Unit) -> Result<F64> {
246    extract_from_interval(interval, unit)
247        .try_into()
248        .map_err(|_| ExprError::NumericOutOfRange)
249}
250
251/// Define an enum and its `FromStr` impl.
252macro_rules! define_unit {
253    ($(#[ $attr:meta ])* enum $name:ident { $($variant:ident,)* }) => {
254        $(#[$attr])*
255        #[derive(Debug, PartialEq, Eq, Clone, Copy)]
256        enum $name {
257            $($variant,)*
258        }
259
260        impl FromStr for $name {
261            type Err = ExprError;
262
263            fn from_str(s: &str) -> Result<Self> {
264                $(
265                    if s.eq_ignore_ascii_case(stringify!($variant)) {
266                        return Ok(Self::$variant);
267                    }
268                )*
269                Err(invalid_unit(s))
270            }
271        }
272    };
273}
274
275define_unit! {
276    /// Datetime units.
277    #[allow(non_camel_case_types)]
278    enum Unit {
279        Millennium,
280        Century,
281        Decade,
282        Year,
283        IsoYear,
284        Quarter,
285        Month,
286        Week,
287        Day,
288        Doy,
289        Dow,
290        IsoDow,
291        Hour,
292        Minute,
293        Second,
294        Millisecond,
295        Microsecond,
296        Nanosecond,
297        Epoch,
298        Julian,
299        Timezone,
300        Timezone_Hour,
301        Timezone_Minute,
302    }
303}
304
305impl Unit {
306    /// Whether the unit is a valid date unit.
307    #[rustfmt::skip]
308    const fn is_date_unit(self) -> bool {
309        matches!(
310            self,
311            Millennium | Century | Decade | Year | IsoYear | Quarter | Month | Week
312            | Day | Doy | Dow | IsoDow | Epoch | Julian
313        )
314    }
315
316    /// Whether the unit is a valid time unit.
317    const fn is_time_unit(self) -> bool {
318        matches!(
319            self,
320            Hour | Minute | Second | Millisecond | Microsecond | Nanosecond | Epoch
321        )
322    }
323
324    /// Whether the unit is a valid timestamp unit.
325    const fn is_timestamp_unit(self) -> bool {
326        self.is_date_unit() || self.is_time_unit()
327    }
328
329    /// Whether the unit is a valid timestamptz unit.
330    const fn is_timestamptz_unit(self) -> bool {
331        matches!(self, Epoch)
332    }
333
334    /// Whether the unit is a valid timestamptz at timezone unit.
335    const fn is_timestamptz_at_timezone_unit(self) -> bool {
336        self.is_timestamp_unit() || matches!(self, Timezone | Timezone_Hour | Timezone_Minute)
337    }
338
339    /// Whether the unit is a valid interval unit.
340    #[rustfmt::skip]
341    const fn is_interval_unit(self) -> bool {
342        matches!(
343            self,
344            Millennium | Century | Decade | Year | Quarter | Month | Day | Hour | Minute
345            | Second | Millisecond | Microsecond | Epoch
346        )
347    }
348
349    /// Ensure the unit is a valid date unit.
350    fn ensure_date(self) -> Result<Self> {
351        if self.is_date_unit() {
352            Ok(self)
353        } else {
354            Err(unsupported_unit(self, "date"))
355        }
356    }
357
358    /// Ensure the unit is a valid time unit.
359    fn ensure_time(self) -> Result<Self> {
360        if self.is_time_unit() {
361            Ok(self)
362        } else {
363            Err(unsupported_unit(self, "time"))
364        }
365    }
366
367    /// Ensure the unit is a valid timestamp unit.
368    fn ensure_timestamp(self) -> Result<Self> {
369        if self.is_timestamp_unit() {
370            Ok(self)
371        } else {
372            Err(unsupported_unit(self, "timestamp"))
373        }
374    }
375
376    /// Ensure the unit is a valid timestamptz unit.
377    fn ensure_timestamptz(self) -> Result<Self> {
378        if self.is_timestamptz_unit() {
379            Ok(self)
380        } else {
381            Err(unsupported_unit(self, "timestamp with time zone"))
382        }
383    }
384
385    /// Ensure the unit is a valid timestamptz unit.
386    fn ensure_timestamptz_at_timezone(self) -> Result<Self> {
387        if self.is_timestamptz_at_timezone_unit() {
388            Ok(self)
389        } else {
390            Err(unsupported_unit(self, "timestamp with time zone"))
391        }
392    }
393
394    /// Ensure the unit is a valid interval unit.
395    fn ensure_interval(self) -> Result<Self> {
396        if self.is_interval_unit() {
397            Ok(self)
398        } else {
399            Err(unsupported_unit(self, "interval"))
400        }
401    }
402}
403
404fn invalid_unit(unit: &str) -> ExprError {
405    ExprError::InvalidParam {
406        name: "unit",
407        reason: format!("unit \"{unit}\" not recognized").into(),
408    }
409}
410
411fn unsupported_unit(unit: Unit, type_: &str) -> ExprError {
412    ExprError::InvalidParam {
413        name: "unit",
414        reason: format!("unit \"{unit:?}\" not supported for type {type_}").into(),
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use chrono::{NaiveDate, NaiveDateTime};
421
422    use super::*;
423
424    #[test]
425    fn test_extract_from_date() {
426        let date = Date::new(NaiveDate::parse_from_str("2021-11-22", "%Y-%m-%d").unwrap());
427        let extract = |i| extract_from_date(date, &i).to_string();
428        assert_eq!(extract(Day), "22");
429        assert_eq!(extract(Month), "11");
430        assert_eq!(extract(Year), "2021");
431        assert_eq!(extract(Dow), "1");
432        assert_eq!(extract(Doy), "326");
433        assert_eq!(extract(Millennium), "3");
434        assert_eq!(extract(Century), "21");
435        assert_eq!(extract(Decade), "202");
436        assert_eq!(extract(IsoYear), "2021");
437        assert_eq!(extract(Quarter), "4");
438        assert_eq!(extract(Week), "47");
439        assert_eq!(extract(IsoDow), "1");
440        assert_eq!(extract(Epoch), "1637539200");
441        assert_eq!(extract(Julian), "2459541");
442    }
443
444    #[test]
445    fn test_extract_from_time() {
446        let time: Time = "23:22:57.123450".parse().unwrap();
447        let extract = |unit| extract_from_time(time, &unit).to_string();
448        assert_eq!(extract(Hour), "23");
449        assert_eq!(extract(Minute), "22");
450        assert_eq!(extract(Second), "57.123450000");
451        assert_eq!(extract(Millisecond), "57123.450000");
452        assert_eq!(extract(Microsecond), "57123450.000");
453        assert_eq!(extract(Epoch), "84177.123450000");
454    }
455
456    #[test]
457    fn test_extract_from_timestamp() {
458        let ts = Timestamp::new(
459            NaiveDateTime::parse_from_str("2021-11-22 12:4:2.575400", "%Y-%m-%d %H:%M:%S%.f")
460                .unwrap(),
461        );
462        let extract = |unit| extract_from_timestamp(ts, &unit).to_string();
463        assert_eq!(extract(Millennium), "3");
464        assert_eq!(extract(Century), "21");
465        assert_eq!(extract(Decade), "202");
466        assert_eq!(extract(IsoYear), "2021");
467        assert_eq!(extract(Year), "2021");
468        assert_eq!(extract(Quarter), "4");
469        assert_eq!(extract(Month), "11");
470        assert_eq!(extract(Week), "47");
471        assert_eq!(extract(Day), "22");
472        assert_eq!(extract(Dow), "1");
473        assert_eq!(extract(IsoDow), "1");
474        assert_eq!(extract(Doy), "326");
475        assert_eq!(extract(Hour), "12");
476        assert_eq!(extract(Minute), "4");
477        assert_eq!(extract(Second), "2.575400000");
478        assert_eq!(extract(Millisecond), "2575.400000");
479        assert_eq!(extract(Microsecond), "2575400.000");
480        assert_eq!(extract(Epoch), "1637582642.575400000");
481        assert_eq!(extract(Julian), "2459541.5028075856481481481481");
482    }
483
484    #[test]
485    fn test_extract_from_timestamptz() {
486        let ts: Timestamptz = "2023-06-01 00:00:00Z".parse().unwrap();
487        let extract = |unit| {
488            extract_from_timestamptz_at_timezone(ts, "pst8pdt", &unit)
489                .unwrap()
490                .to_string()
491        };
492        assert_eq!(extract(Timezone), "-25200");
493        assert_eq!(extract(Timezone_Hour), "-7");
494        assert_eq!(extract(Timezone_Minute), "0");
495    }
496
497    #[test]
498    fn test_extract_from_interval() {
499        let interval: Interval = "2345 years 1 mon 250 days 23:22:57.123450".parse().unwrap();
500        let extract = |unit| extract_from_interval(interval, &unit).to_string();
501        assert_eq!(extract(Millennium), "2");
502        assert_eq!(extract(Century), "23");
503        assert_eq!(extract(Decade), "234");
504        assert_eq!(extract(Year), "2345");
505        assert_eq!(extract(Month), "1");
506        assert_eq!(extract(Day), "250");
507        assert_eq!(extract(Hour), "23");
508        assert_eq!(extract(Minute), "22");
509        assert_eq!(extract(Second), "57.123450");
510        assert_eq!(extract(Millisecond), "57123.450");
511        assert_eq!(extract(Microsecond), "57123450");
512        assert_eq!(extract(Epoch), "74026848177.123450");
513
514        let interval: Interval = "-2345 years -1 mon -250 days -23:22:57.123450"
515            .parse()
516            .unwrap();
517        let extract = |unit| extract_from_interval(interval, &unit).to_string();
518        assert_eq!(extract(Millennium), "-2");
519        assert_eq!(extract(Century), "-23");
520        assert_eq!(extract(Decade), "-234");
521        assert_eq!(extract(Year), "-2345");
522        assert_eq!(extract(Month), "-1");
523        assert_eq!(extract(Day), "-250");
524        assert_eq!(extract(Hour), "-23");
525        assert_eq!(extract(Minute), "-22");
526        assert_eq!(extract(Second), "-57.123450");
527        assert_eq!(extract(Millisecond), "-57123.450");
528        assert_eq!(extract(Microsecond), "-57123450");
529        assert_eq!(extract(Epoch), "-74026848177.123450");
530    }
531}