104 lines
3.4 KiB
Python
104 lines
3.4 KiB
Python
# This file is part of the Glowtables software
|
|
# Copyright (C) 2023 Valentin Lorentz
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it under the
|
|
# terms of the GNU Affero General Public License version 3, as published by the
|
|
# Free Software Foundation.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
|
# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License along with
|
|
# this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
"""SPARQL query cache"""
|
|
|
|
import datetime
|
|
import random
|
|
import sqlite3
|
|
from typing import Optional
|
|
|
|
EXPIRE_PROBA = 0.001
|
|
"""Probability an ``INSERT INTO`` is preceded by a ``DELETE`` of all old records."""
|
|
|
|
CACHE_LIFETIME = datetime.timedelta(days=7)
|
|
|
|
|
|
def _now() -> datetime.datetime:
|
|
return datetime.datetime.now(tz=datetime.timezone.utc)
|
|
|
|
|
|
class Cache:
|
|
"""A simple key-value cache for SPARQL queries"""
|
|
|
|
def __init__(self, db: str):
|
|
self._db = sqlite3.connect(db)
|
|
self._init_schema()
|
|
|
|
def _init_schema(self):
|
|
"""Initialize tables and indexes"""
|
|
with self._db:
|
|
self._db.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS sparql_queries (
|
|
url TEXT,
|
|
query TEXT,
|
|
response TEXT,
|
|
date TEXT -- ISO8601 timestamp of the recorded query, must be UTC
|
|
);
|
|
"""
|
|
)
|
|
self._db.execute(
|
|
"""
|
|
CREATE UNIQUE INDEX IF NOT EXISTS sparql_queries_pk
|
|
ON sparql_queries (url, query)
|
|
"""
|
|
)
|
|
|
|
def _expire(self) -> None:
|
|
"""Randomly delete outdated item from the database."""
|
|
if random.random() < EXPIRE_PROBA:
|
|
with self._db:
|
|
self._db.execute(
|
|
"""
|
|
DELETE FROM sparql_queries WHERE date < ?
|
|
""",
|
|
((_now() - CACHE_LIFETIME).isoformat()),
|
|
)
|
|
|
|
def get(self, url: str, query: str) -> Optional[str]:
|
|
"""Gets the response to a previous query from the cache, or None."""
|
|
with self._db:
|
|
cur = self._db.execute(
|
|
"""
|
|
SELECT response
|
|
FROM sparql_queries
|
|
WHERE url=? AND query=? AND date >= ?
|
|
""",
|
|
(url, query, (_now() - CACHE_LIFETIME).isoformat()),
|
|
)
|
|
rows = list(cur)
|
|
if rows:
|
|
# cache hit
|
|
((resp,),) = rows
|
|
return resp
|
|
else:
|
|
# cache miss
|
|
return None
|
|
|
|
def set(self, url: str, query: str, response: str) -> None:
|
|
"""Adds the response of a query to the cache."""
|
|
self._expire()
|
|
with self._db:
|
|
self._db.execute(
|
|
"""
|
|
INSERT INTO sparql_queries(url, query, response, date)
|
|
VALUES (?, ?, ?, ?)
|
|
ON CONFLICT(url, query) DO UPDATE SET
|
|
response=EXCLUDED.response,
|
|
date=EXCLUDED.date
|
|
""",
|
|
(url, query, response, _now().isoformat()),
|
|
)
|