# This file is part of the Glowtables software # Copyright (C) 2023 Valentin Lorentz # # This program is free software: you can redistribute it and/or modify it under the # terms of the GNU Affero General Public License version 3, as published by the # Free Software Foundation. # # This program is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License along with # this program. If not, see . """SPARQL query cache""" import datetime import random import sqlite3 from typing import Optional EXPIRE_PROBA = 0.001 """Probability an ``INSERT INTO`` is preceded by a ``DELETE`` of all old records.""" CACHE_LIFETIME = datetime.timedelta(days=7) def _now() -> datetime.datetime: return datetime.datetime.now(tz=datetime.timezone.utc) class Cache: """A simple key-value cache for SPARQL queries""" def __init__(self, db: str): self._db = sqlite3.connect(db) self._init_schema() def _init_schema(self): """Initialize tables and indexes""" with self._db: self._db.execute( """ CREATE TABLE IF NOT EXISTS sparql_queries ( url TEXT, query TEXT, response TEXT, date TEXT -- ISO8601 timestamp of the recorded query, must be UTC ); """ ) self._db.execute( """ CREATE UNIQUE INDEX IF NOT EXISTS sparql_queries_pk ON sparql_queries (url, query) """ ) def _expire(self) -> None: """Randomly delete outdated item from the database.""" if random.random() < EXPIRE_PROBA: with self._db: self._db.execute( """ DELETE FROM sparql_queries WHERE date < ? """, ((_now() - CACHE_LIFETIME).isoformat()), ) def get(self, url: str, query: str) -> Optional[str]: """Gets the response to a previous query from the cache, or None.""" with self._db: cur = self._db.execute( """ SELECT response FROM sparql_queries WHERE url=? AND query=? AND date >= ? """, (url, query, (_now() - CACHE_LIFETIME).isoformat()), ) rows = list(cur) if rows: # cache hit ((resp,),) = rows return resp else: # cache miss return None def set(self, url: str, query: str, response: str) -> None: """Adds the response of a query to the cache.""" self._expire() with self._db: self._db.execute( """ INSERT INTO sparql_queries(url, query, response, date) VALUES (?, ?, ?, ?) ON CONFLICT(url, query) DO UPDATE SET response=EXCLUDED.response, date=EXCLUDED.date """, (url, query, response, _now().isoformat()), )