Skip to content

prefect.server.events.counting

InvalidEventCountParameters

Bases: ValueError

Raised when the given parameters are invalid for counting events.

Source code in src/prefect/server/events/counting.py
24
25
26
27
28
29
class InvalidEventCountParameters(ValueError):
    """Raised when the given parameters are invalid for counting events."""

    def __init__(self, message: str):
        super().__init__(message)
        self.message = message

TimeUnit

Bases: AutoEnum

Source code in src/prefect/server/events/counting.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
class TimeUnit(AutoEnum):
    week = AutoEnum.auto()
    day = AutoEnum.auto()
    hour = AutoEnum.auto()
    minute = AutoEnum.auto()
    second = AutoEnum.auto()

    def as_timedelta(self, interval) -> pendulum.Duration:
        if self == self.week:
            return pendulum.Duration(days=7 * interval)
        elif self == self.day:
            return pendulum.Duration(days=1 * interval)
        elif self == self.hour:
            return pendulum.Duration(hours=1 * interval)
        elif self == self.minute:
            return pendulum.Duration(minutes=1 * interval)
        elif self == self.second:
            return pendulum.Duration(seconds=1 * interval)
        else:
            raise NotImplementedError()

    def validate_buckets(
        self, start_datetime: DateTime, end_datetime: DateTime, interval: float
    ):
        MAX_ALLOWED_BUCKETS = 1000

        delta = self.as_timedelta(interval)
        start_in_utc = start_datetime.in_timezone("UTC")
        end_in_utc = end_datetime.in_timezone("UTC")

        if interval < 0.01:
            raise InvalidEventCountParameters("The minimum interval is 0.01")

        number_of_buckets = math.ceil((end_in_utc - start_in_utc) / delta)
        if number_of_buckets > MAX_ALLOWED_BUCKETS:
            raise InvalidEventCountParameters(
                f"The given interval would create {number_of_buckets} buckets, "
                "which is too many. Please increase the interval or reduce the "
                f"time range to produce {MAX_ALLOWED_BUCKETS} buckets or fewer."
            )

    def get_interval_spans(
        self,
        start_datetime: DateTime,
        end_datetime: DateTime,
        interval: float,
    ):
        """Divide the given range of dates into evenly-sized spans of interval units"""
        self.validate_buckets(start_datetime, end_datetime, interval)

        # Our universe began on PIVOT_DATETIME and all time after that is
        # divided into `delta`-sized buckets. We want to find the bucket that
        # contains `start_datetime` and then find the all of the buckets
        # that come after it until the bucket that contains `end_datetime`.

        delta = self.as_timedelta(interval)
        start_in_utc = start_datetime.in_timezone("UTC")
        end_in_utc = end_datetime.in_timezone("UTC")

        if end_in_utc > pendulum.now("UTC"):
            end_in_utc = pendulum.now("UTC").end_of(self.value)

        first_span_index = math.floor((start_in_utc - PIVOT_DATETIME) / delta)

        yield first_span_index

        span_start = PIVOT_DATETIME + delta * first_span_index

        while span_start < end_in_utc:
            next_span_start = span_start + delta
            yield (span_start, next_span_start - timedelta(microseconds=1))
            span_start = next_span_start

    def database_value_expression(self, time_interval: float):
        """Returns the SQL expression to place an event in a time bucket"""
        # The date_bin function can do the bucketing for us:
        # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
        db = provide_database_interface()
        delta = self.as_timedelta(time_interval)
        if db.dialect.name == "postgresql":
            return sa.cast(
                sa.func.floor(
                    sa.extract(
                        "epoch",
                        (
                            sa.func.date_bin(delta, db.Event.occurred, PIVOT_DATETIME)
                            - PIVOT_DATETIME
                        ),
                    )
                    / delta.total_seconds(),
                ),
                sa.Text,
            )
        elif db.dialect.name == "sqlite":
            # Convert pivot date and event date to strings formatted as seconds since the epoch
            pivot_timestamp = sa.func.strftime(
                "%s", PIVOT_DATETIME.strftime("%Y-%m-%d %H:%M:%S")
            )
            event_timestamp = sa.func.strftime("%s", db.Event.occurred)
            seconds_since_pivot = event_timestamp - pivot_timestamp
            # Calculate the bucket index by dividing by the interval in seconds and flooring the result
            bucket_index = sa.func.floor(
                sa.cast(seconds_since_pivot, sa.Integer) / delta.total_seconds()
            )
            return sa.cast(bucket_index, sa.Text)
        else:
            raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

    def database_label_expression(self, db: PrefectDBInterface, time_interval: float):
        """Returns the SQL expression to label a time bucket"""
        time_delta = self.as_timedelta(time_interval)
        if db.dialect.name == "postgresql":
            # The date_bin function can do the bucketing for us:
            # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
            return sa.func.to_char(
                sa.func.date_bin(time_delta, db.Event.occurred, PIVOT_DATETIME),
                'YYYY-MM-DD"T"HH24:MI:SSTZH:TZM',
            )
        elif db.dialect.name == "sqlite":
            # We can't use date_bin in SQLite, so we have to do the bucketing manually
            seconds_since_epoch = sa.func.strftime("%s", db.Event.occurred)
            # Convert the total seconds of the timedelta to a constant in SQL
            bucket_size = time_delta.total_seconds()
            # Perform integer division and multiplication to find the bucket start epoch using SQL functions
            bucket_start_epoch = sa.func.cast(
                (sa.cast(seconds_since_epoch, sa.Integer) / bucket_size) * bucket_size,
                sa.Integer,
            )
            bucket_datetime = sa.func.strftime(
                "%Y-%m-%dT%H:%M:%SZ", sa.func.datetime(bucket_start_epoch, "unixepoch")
            )
            return bucket_datetime
        else:
            raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

database_label_expression(db, time_interval)

Returns the SQL expression to label a time bucket

Source code in src/prefect/server/events/counting.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def database_label_expression(self, db: PrefectDBInterface, time_interval: float):
    """Returns the SQL expression to label a time bucket"""
    time_delta = self.as_timedelta(time_interval)
    if db.dialect.name == "postgresql":
        # The date_bin function can do the bucketing for us:
        # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
        return sa.func.to_char(
            sa.func.date_bin(time_delta, db.Event.occurred, PIVOT_DATETIME),
            'YYYY-MM-DD"T"HH24:MI:SSTZH:TZM',
        )
    elif db.dialect.name == "sqlite":
        # We can't use date_bin in SQLite, so we have to do the bucketing manually
        seconds_since_epoch = sa.func.strftime("%s", db.Event.occurred)
        # Convert the total seconds of the timedelta to a constant in SQL
        bucket_size = time_delta.total_seconds()
        # Perform integer division and multiplication to find the bucket start epoch using SQL functions
        bucket_start_epoch = sa.func.cast(
            (sa.cast(seconds_since_epoch, sa.Integer) / bucket_size) * bucket_size,
            sa.Integer,
        )
        bucket_datetime = sa.func.strftime(
            "%Y-%m-%dT%H:%M:%SZ", sa.func.datetime(bucket_start_epoch, "unixepoch")
        )
        return bucket_datetime
    else:
        raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

database_value_expression(time_interval)

Returns the SQL expression to place an event in a time bucket

Source code in src/prefect/server/events/counting.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def database_value_expression(self, time_interval: float):
    """Returns the SQL expression to place an event in a time bucket"""
    # The date_bin function can do the bucketing for us:
    # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
    db = provide_database_interface()
    delta = self.as_timedelta(time_interval)
    if db.dialect.name == "postgresql":
        return sa.cast(
            sa.func.floor(
                sa.extract(
                    "epoch",
                    (
                        sa.func.date_bin(delta, db.Event.occurred, PIVOT_DATETIME)
                        - PIVOT_DATETIME
                    ),
                )
                / delta.total_seconds(),
            ),
            sa.Text,
        )
    elif db.dialect.name == "sqlite":
        # Convert pivot date and event date to strings formatted as seconds since the epoch
        pivot_timestamp = sa.func.strftime(
            "%s", PIVOT_DATETIME.strftime("%Y-%m-%d %H:%M:%S")
        )
        event_timestamp = sa.func.strftime("%s", db.Event.occurred)
        seconds_since_pivot = event_timestamp - pivot_timestamp
        # Calculate the bucket index by dividing by the interval in seconds and flooring the result
        bucket_index = sa.func.floor(
            sa.cast(seconds_since_pivot, sa.Integer) / delta.total_seconds()
        )
        return sa.cast(bucket_index, sa.Text)
    else:
        raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

get_interval_spans(start_datetime, end_datetime, interval)

Divide the given range of dates into evenly-sized spans of interval units

Source code in src/prefect/server/events/counting.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def get_interval_spans(
    self,
    start_datetime: DateTime,
    end_datetime: DateTime,
    interval: float,
):
    """Divide the given range of dates into evenly-sized spans of interval units"""
    self.validate_buckets(start_datetime, end_datetime, interval)

    # Our universe began on PIVOT_DATETIME and all time after that is
    # divided into `delta`-sized buckets. We want to find the bucket that
    # contains `start_datetime` and then find the all of the buckets
    # that come after it until the bucket that contains `end_datetime`.

    delta = self.as_timedelta(interval)
    start_in_utc = start_datetime.in_timezone("UTC")
    end_in_utc = end_datetime.in_timezone("UTC")

    if end_in_utc > pendulum.now("UTC"):
        end_in_utc = pendulum.now("UTC").end_of(self.value)

    first_span_index = math.floor((start_in_utc - PIVOT_DATETIME) / delta)

    yield first_span_index

    span_start = PIVOT_DATETIME + delta * first_span_index

    while span_start < end_in_utc:
        next_span_start = span_start + delta
        yield (span_start, next_span_start - timedelta(microseconds=1))
        span_start = next_span_start