glowtables/glowtables/cache.py

104 lines
3.4 KiB
Python

# This file is part of the Glowtables software
# Copyright (C) 2023 Valentin Lorentz
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU Affero General Public License version 3, as published by the
# Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.
"""SPARQL query cache"""
import datetime
import random
import sqlite3
from typing import Optional
EXPIRE_PROBA = 0.001
"""Probability an ``INSERT INTO`` is preceded by a ``DELETE`` of all old records."""
CACHE_LIFETIME = datetime.timedelta(days=7)
def _now() -> datetime.datetime:
return datetime.datetime.now(tz=datetime.timezone.utc)
class Cache:
"""A simple key-value cache for SPARQL queries"""
def __init__(self, db: str):
self._db = sqlite3.connect(db)
self._init_schema()
def _init_schema(self):
"""Initialize tables and indexes"""
with self._db:
self._db.execute(
"""
CREATE TABLE IF NOT EXISTS sparql_queries (
url TEXT,
query TEXT,
response TEXT,
date TEXT -- ISO8601 timestamp of the recorded query, must be UTC
);
"""
)
self._db.execute(
"""
CREATE UNIQUE INDEX IF NOT EXISTS sparql_queries_pk
ON sparql_queries (url, query)
"""
)
def _expire(self) -> None:
"""Randomly delete outdated item from the database."""
if random.random() < EXPIRE_PROBA:
with self._db:
self._db.execute(
"""
DELETE FROM sparql_queries WHERE date < ?
""",
((_now() - CACHE_LIFETIME).isoformat()),
)
def get(self, url: str, query: str) -> Optional[str]:
"""Gets the response to a previous query from the cache, or None."""
with self._db:
cur = self._db.execute(
"""
SELECT response
FROM sparql_queries
WHERE url=? AND query=? AND date >= ?
""",
(url, query, (_now() - CACHE_LIFETIME).isoformat()),
)
rows = list(cur)
if rows:
# cache hit
((resp,),) = rows
return resp
else:
# cache miss
return None
def set(self, url: str, query: str, response: str) -> None:
"""Adds the response of a query to the cache."""
self._expire()
with self._db:
self._db.execute(
"""
INSERT INTO sparql_queries(url, query, response, date)
VALUES (?, ?, ?, ?)
ON CONFLICT(url, query) DO UPDATE SET
response=EXCLUDED.response,
date=EXCLUDED.date
""",
(url, query, response, _now().isoformat()),
)