glowtables/glowtables/sparql.py

# This file is part of the Glowtables software
# Copyright (C) 2023  Valentin Lorentz
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU Affero General Public License version 3, as published by the
# Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License along with
# this program.  If not, see <http://www.gnu.org/licenses/>.

"""Abstraction over SPARQL backends, primarily meant to be mocked by tests."""

import abc
import json
from typing import Iterable

import requests

from .cache import Cache


class SparqlBackend(abc.ABC):
    """Abstract class for SPARQL clients"""

    @abc.abstractmethod
    def query(self, query: str) -> Iterable[tuple]:
        """Sends a SPARQL query, and returns an iterable of results."""


class RemoteSparqlBackend(SparqlBackend):
    """Queries a SPARQL API over HTTP."""

    def __init__(self, url: str, agent: str, cache: Cache):
        """
        :param url: Base URL of the endpoint
        :param agent: User-Agent to use in HTTP requests
        """
        self._url = url
        self._session = requests.Session()
        self._session.headers["User-Agent"] = agent
        self._cache = cache

    def query(self, query: str) -> Iterable[tuple]:
        headers = {
            "Content-Type": "application/sparql-query",
            "Accept": "application/json",
        }

        resp_text = self._cache.get(self._url, query)
        if not resp_text:
            resp_text = self._session.post(self._url, headers=headers, data=query).text
            self._cache.set(self._url, query, resp_text)

        resp = json.loads(resp_text)
        variables = resp["head"]["vars"]
        for result in resp["results"]["bindings"]:
            yield tuple(result.get(variable) for variable in variables)