prefect.server.events.counting

`InvalidEventCountParameters`

Bases: ValueError

Raised when the given parameters are invalid for counting events.

Source code in src/prefect/server/events/counting.py

class InvalidEventCountParameters(ValueError):
    """Raised when the given parameters are invalid for counting events."""

    def __init__(self, message: str):
        super().__init__(message)
        self.message = message

`TimeUnit`

Bases: AutoEnum

Source code in src/prefect/server/events/counting.py

class TimeUnit(AutoEnum):
    week = AutoEnum.auto()
    day = AutoEnum.auto()
    hour = AutoEnum.auto()
    minute = AutoEnum.auto()
    second = AutoEnum.auto()

    def as_timedelta(self, interval) -> pendulum.Duration:
        if self == self.week:
            return pendulum.Duration(days=7 * interval)
        elif self == self.day:
            return pendulum.Duration(days=1 * interval)
        elif self == self.hour:
            return pendulum.Duration(hours=1 * interval)
        elif self == self.minute:
            return pendulum.Duration(minutes=1 * interval)
        elif self == self.second:
            return pendulum.Duration(seconds=1 * interval)
        else:
            raise NotImplementedError()

    def validate_buckets(
        self, start_datetime: DateTime, end_datetime: DateTime, interval: float
    ):
        MAX_ALLOWED_BUCKETS = 1000

        delta = self.as_timedelta(interval)
        start_in_utc = start_datetime.in_timezone("UTC")
        end_in_utc = end_datetime.in_timezone("UTC")

        if interval < 0.01:
            raise InvalidEventCountParameters("The minimum interval is 0.01")

        number_of_buckets = math.ceil((end_in_utc - start_in_utc) / delta)
        if number_of_buckets > MAX_ALLOWED_BUCKETS:
            raise InvalidEventCountParameters(
                f"The given interval would create {number_of_buckets} buckets, "
                "which is too many. Please increase the interval or reduce the "
                f"time range to produce {MAX_ALLOWED_BUCKETS} buckets or fewer."
            )

    def get_interval_spans(
        self,
        start_datetime: DateTime,
        end_datetime: DateTime,
        interval: float,
    ):
        """Divide the given range of dates into evenly-sized spans of interval units"""
        self.validate_buckets(start_datetime, end_datetime, interval)

        # Our universe began on PIVOT_DATETIME and all time after that is
        # divided into `delta`-sized buckets. We want to find the bucket that
        # contains `start_datetime` and then find the all of the buckets
        # that come after it until the bucket that contains `end_datetime`.

        delta = self.as_timedelta(interval)
        start_in_utc = start_datetime.in_timezone("UTC")
        end_in_utc = end_datetime.in_timezone("UTC")

        if end_in_utc > pendulum.now("UTC"):
            end_in_utc = pendulum.now("UTC").end_of(self.value)

        first_span_index = math.floor((start_in_utc - PIVOT_DATETIME) / delta)

        yield first_span_index

        span_start = PIVOT_DATETIME + delta * first_span_index

        while span_start < end_in_utc:
            next_span_start = span_start + delta
            yield (span_start, next_span_start - timedelta(microseconds=1))
            span_start = next_span_start

    def database_value_expression(self, time_interval: float):
        """Returns the SQL expression to place an event in a time bucket"""
        # The date_bin function can do the bucketing for us:
        # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
        db = provide_database_interface()
        delta = self.as_timedelta(time_interval)
        if db.dialect.name == "postgresql":
            return sa.cast(
                sa.func.floor(
                    sa.extract(
                        "epoch",
                        (
                            sa.func.date_bin(delta, db.Event.occurred, PIVOT_DATETIME)
                            - PIVOT_DATETIME
                        ),
                    )
                    / delta.total_seconds(),
                ),
                sa.Text,
            )
        elif db.dialect.name == "sqlite":
            # Convert pivot date and event date to strings formatted as seconds since the epoch
            pivot_timestamp = sa.func.strftime(
                "%s", PIVOT_DATETIME.strftime("%Y-%m-%d %H:%M:%S")
            )
            event_timestamp = sa.func.strftime("%s", db.Event.occurred)
            seconds_since_pivot = event_timestamp - pivot_timestamp
            # Calculate the bucket index by dividing by the interval in seconds and flooring the result
            bucket_index = sa.func.floor(
                sa.cast(seconds_since_pivot, sa.Integer) / delta.total_seconds()
            )
            return sa.cast(bucket_index, sa.Text)
        else:
            raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

    def database_label_expression(self, db: PrefectDBInterface, time_interval: float):
        """Returns the SQL expression to label a time bucket"""
        time_delta = self.as_timedelta(time_interval)
        if db.dialect.name == "postgresql":
            # The date_bin function can do the bucketing for us:
            # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
            return sa.func.to_char(
                sa.func.date_bin(time_delta, db.Event.occurred, PIVOT_DATETIME),
                'YYYY-MM-DD"T"HH24:MI:SSTZH:TZM',
            )
        elif db.dialect.name == "sqlite":
            # We can't use date_bin in SQLite, so we have to do the bucketing manually
            seconds_since_epoch = sa.func.strftime("%s", db.Event.occurred)
            # Convert the total seconds of the timedelta to a constant in SQL
            bucket_size = time_delta.total_seconds()
            # Perform integer division and multiplication to find the bucket start epoch using SQL functions
            bucket_start_epoch = sa.func.cast(
                (sa.cast(seconds_since_epoch, sa.Integer) / bucket_size) * bucket_size,
                sa.Integer,
            )
            bucket_datetime = sa.func.strftime(
                "%Y-%m-%dT%H:%M:%SZ", sa.func.datetime(bucket_start_epoch, "unixepoch")
            )
            return bucket_datetime
        else:
            raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

`database_label_expression(db, time_interval)`

Returns the SQL expression to label a time bucket

Source code in src/prefect/server/events/counting.py

def database_label_expression(self, db: PrefectDBInterface, time_interval: float):
    """Returns the SQL expression to label a time bucket"""
    time_delta = self.as_timedelta(time_interval)
    if db.dialect.name == "postgresql":
        # The date_bin function can do the bucketing for us:
        # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
        return sa.func.to_char(
            sa.func.date_bin(time_delta, db.Event.occurred, PIVOT_DATETIME),
            'YYYY-MM-DD"T"HH24:MI:SSTZH:TZM',
        )
    elif db.dialect.name == "sqlite":
        # We can't use date_bin in SQLite, so we have to do the bucketing manually
        seconds_since_epoch = sa.func.strftime("%s", db.Event.occurred)
        # Convert the total seconds of the timedelta to a constant in SQL
        bucket_size = time_delta.total_seconds()
        # Perform integer division and multiplication to find the bucket start epoch using SQL functions
        bucket_start_epoch = sa.func.cast(
            (sa.cast(seconds_since_epoch, sa.Integer) / bucket_size) * bucket_size,
            sa.Integer,
        )
        bucket_datetime = sa.func.strftime(
            "%Y-%m-%dT%H:%M:%SZ", sa.func.datetime(bucket_start_epoch, "unixepoch")
        )
        return bucket_datetime
    else:
        raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

`database_value_expression(time_interval)`

Returns the SQL expression to place an event in a time bucket

Source code in src/prefect/server/events/counting.py

def database_value_expression(self, time_interval: float):
    """Returns the SQL expression to place an event in a time bucket"""
    # The date_bin function can do the bucketing for us:
    # https://www.postgresql.org/docs/14/functions-datetime.html#FUNCTIONS-DATETIME-BIN
    db = provide_database_interface()
    delta = self.as_timedelta(time_interval)
    if db.dialect.name == "postgresql":
        return sa.cast(
            sa.func.floor(
                sa.extract(
                    "epoch",
                    (
                        sa.func.date_bin(delta, db.Event.occurred, PIVOT_DATETIME)
                        - PIVOT_DATETIME
                    ),
                )
                / delta.total_seconds(),
            ),
            sa.Text,
        )
    elif db.dialect.name == "sqlite":
        # Convert pivot date and event date to strings formatted as seconds since the epoch
        pivot_timestamp = sa.func.strftime(
            "%s", PIVOT_DATETIME.strftime("%Y-%m-%d %H:%M:%S")
        )
        event_timestamp = sa.func.strftime("%s", db.Event.occurred)
        seconds_since_pivot = event_timestamp - pivot_timestamp
        # Calculate the bucket index by dividing by the interval in seconds and flooring the result
        bucket_index = sa.func.floor(
            sa.cast(seconds_since_pivot, sa.Integer) / delta.total_seconds()
        )
        return sa.cast(bucket_index, sa.Text)
    else:
        raise NotImplementedError(f"Dialect {db.dialect.name} is not supported.")

`get_interval_spans(start_datetime, end_datetime, interval)`

Divide the given range of dates into evenly-sized spans of interval units

Source code in src/prefect/server/events/counting.py

def get_interval_spans(
    self,
    start_datetime: DateTime,
    end_datetime: DateTime,
    interval: float,
):
    """Divide the given range of dates into evenly-sized spans of interval units"""
    self.validate_buckets(start_datetime, end_datetime, interval)

    # Our universe began on PIVOT_DATETIME and all time after that is
    # divided into `delta`-sized buckets. We want to find the bucket that
    # contains `start_datetime` and then find the all of the buckets
    # that come after it until the bucket that contains `end_datetime`.

    delta = self.as_timedelta(interval)
    start_in_utc = start_datetime.in_timezone("UTC")
    end_in_utc = end_datetime.in_timezone("UTC")

    if end_in_utc > pendulum.now("UTC"):
        end_in_utc = pendulum.now("UTC").end_of(self.value)

    first_span_index = math.floor((start_in_utc - PIVOT_DATETIME) / delta)

    yield first_span_index

    span_start = PIVOT_DATETIME + delta * first_span_index

    while span_start < end_in_utc:
        next_span_start = span_start + delta
        yield (span_start, next_span_start - timedelta(microseconds=1))
        span_start = next_span_start