glowtables/glowtables/sparql.py

62 lines
2.1 KiB
Python

# This file is part of the Glowtables software
# Copyright (C) 2023 Valentin Lorentz
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU Affero General Public License version 3, as published by the
# Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.
"""Abstraction over SPARQL backends, primarily meant to be mocked by tests."""
import abc
import json
from typing import Iterable
import requests
from .cache import Cache
class SparqlBackend(abc.ABC):
"""Abstract class for SPARQL clients"""
@abc.abstractmethod
def query(self, query: str) -> Iterable[tuple]:
"""Sends a SPARQL query, and returns an iterable of results."""
class RemoteSparqlBackend(SparqlBackend):
"""Queries a SPARQL API over HTTP."""
def __init__(self, url: str, agent: str, cache: Cache):
"""
:param url: Base URL of the endpoint
:param agent: User-Agent to use in HTTP requests
"""
self._url = url
self._session = requests.Session()
self._session.headers["User-Agent"] = agent
self._cache = cache
def query(self, query: str) -> Iterable[tuple]:
headers = {
"Content-Type": "application/sparql-query",
"Accept": "application/json",
}
resp_text = self._cache.get(self._url, query)
if not resp_text:
resp_text = self._session.post(self._url, headers=headers, data=query).text
self._cache.set(self._url, query, resp_text)
resp = json.loads(resp_text)
variables = resp["head"]["vars"]
for result in resp["results"]["bindings"]:
yield tuple(result.get(variable) for variable in variables)