Skip to content

prefect.records.filesystem

FileSystemRecordStore

Bases: RecordStore

A record store that stores data on the local filesystem.

Locking is implemented using a lock file with the same name as the record file, but with a .lock extension.

Attributes:

Name Type Description
records_directory

the directory where records are stored; defaults to {PREFECT_HOME}/records

Source code in src/prefect/records/filesystem.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
class FileSystemRecordStore(RecordStore):
    """
    A record store that stores data on the local filesystem.

    Locking is implemented using a lock file with the same name as the record file,
    but with a `.lock` extension.

    Attributes:
        records_directory: the directory where records are stored; defaults to
            `{PREFECT_HOME}/records`
    """

    def __init__(self, records_directory: Path):
        self.records_directory = records_directory
        self._locks: Dict[str, _LockInfo] = {}

    def _ensure_records_directory_exists(self):
        self.records_directory.mkdir(parents=True, exist_ok=True)

    def _lock_path_for_key(self, key: str) -> Path:
        if (lock_info := self._locks.get(key)) is not None:
            return lock_info["path"]
        return self.records_directory.joinpath(key).with_suffix(".lock")

    def _get_lock_info(self, key: str, use_cache=True) -> Optional[_LockInfo]:
        if use_cache:
            if (lock_info := self._locks.get(key)) is not None:
                print("Got lock info from cache")
                return lock_info

        lock_path = self._lock_path_for_key(key)

        try:
            with open(lock_path, "r") as lock_file:
                lock_info = json.load(lock_file)
                lock_info["path"] = lock_path
                expiration = lock_info.get("expiration")
                lock_info["expiration"] = (
                    pendulum.parse(expiration) if expiration is not None else None
                )
            self._locks[key] = lock_info
            print("Got lock info from file")
            return lock_info
        except FileNotFoundError:
            return None

    def read(
        self, key: str, holder: Optional[str] = None
    ) -> Optional[TransactionRecord]:
        if not self.exists(key):
            return None

        holder = holder or self.generate_default_holder()

        if self.is_locked(key) and not self.is_lock_holder(key, holder):
            self.wait_for_lock(key)
        record_data = self.records_directory.joinpath(key).read_text()
        return TransactionRecord(
            key=key, result=BaseResult.model_validate_json(record_data)
        )

    def write(self, key: str, result: BaseResult, holder: Optional[str] = None) -> None:
        self._ensure_records_directory_exists()

        if self.is_locked(key) and not self.is_lock_holder(key, holder):
            raise ValueError(
                f"Cannot write to transaction with key {key} because it is locked by another holder."
            )

        record_path = self.records_directory.joinpath(key)
        record_path.touch(exist_ok=True)
        record_data = result.model_dump_json()
        record_path.write_text(record_data)

    def exists(self, key: str) -> bool:
        return self.records_directory.joinpath(key).exists()

    def supports_isolation_level(self, isolation_level: IsolationLevel) -> bool:
        return isolation_level in {
            IsolationLevel.READ_COMMITTED,
            IsolationLevel.SERIALIZABLE,
        }

    def acquire_lock(
        self,
        key: str,
        holder: Optional[str] = None,
        acquire_timeout: Optional[float] = None,
        hold_timeout: Optional[float] = None,
    ) -> bool:
        holder = holder or self.generate_default_holder()

        self._ensure_records_directory_exists()
        lock_path = self._lock_path_for_key(key)

        if self.is_locked(key) and not self.is_lock_holder(key, holder):
            lock_free = self.wait_for_lock(key, acquire_timeout)
            if not lock_free:
                return False

        try:
            Path(lock_path).touch(exist_ok=False)
        except FileExistsError:
            if not self.is_lock_holder(key, holder):
                logger.debug(
                    f"Another actor acquired the lock for record with key {key}. Trying again."
                )
                return self.acquire_lock(key, holder, acquire_timeout, hold_timeout)
        expiration = (
            pendulum.now("utc") + pendulum.duration(seconds=hold_timeout)
            if hold_timeout is not None
            else None
        )

        with open(Path(lock_path), "w") as lock_file:
            json.dump(
                {
                    "holder": holder,
                    "expiration": str(expiration) if expiration is not None else None,
                },
                lock_file,
            )

        self._locks[key] = {
            "holder": holder,
            "expiration": expiration,
            "path": lock_path,
        }

        return True

    def release_lock(self, key: str, holder: Optional[str] = None) -> None:
        holder = holder or self.generate_default_holder()
        lock_path = self._lock_path_for_key(key)
        if not self.is_locked(key):
            ValueError(f"No lock for transaction with key {key}")
        if self.is_lock_holder(key, holder):
            Path(lock_path).unlink(missing_ok=True)
            self._locks.pop(key, None)
        else:
            raise ValueError(f"No lock held by {holder} for transaction with key {key}")

    def is_locked(self, key: str, use_cache: bool = False) -> bool:
        if (lock_info := self._get_lock_info(key, use_cache=use_cache)) is None:
            return False

        if (expiration := lock_info.get("expiration")) is None:
            return True

        expired = expiration < pendulum.now("utc")
        if expired:
            Path(lock_info["path"]).unlink()
            self._locks.pop(key, None)
            return False
        else:
            return True

    def is_lock_holder(self, key: str, holder: Optional[str] = None) -> bool:
        if not self.is_locked(key):
            return False

        holder = holder or self.generate_default_holder()
        if not self.is_locked(key):
            return False
        if (lock_info := self._get_lock_info(key)) is None:
            return False
        return lock_info["holder"] == holder

    def wait_for_lock(self, key: str, timeout: Optional[float] = None) -> bool:
        seconds_waited = 0
        while self.is_locked(key, use_cache=False):
            if timeout and seconds_waited >= timeout:
                return False
            seconds_waited += 0.1
            time.sleep(0.1)
        return True