diff --git a/glowtables/sparql.py b/glowtables/sparql.py index 5662b83..0724fd3 100644 --- a/glowtables/sparql.py +++ b/glowtables/sparql.py @@ -15,6 +15,7 @@ """Abstraction over SPARQL backends, primarily meant to be mocked by tests.""" import abc +import urllib.parse from typing import Iterable import requests @@ -41,8 +42,14 @@ class RemoteSparqlBackend(SparqlBackend): self._session.headers["User-Agent"] = agent def query(self, query: str) -> Iterable[tuple]: - params = {"format": "json", "query": query} - resp = self._session.post(self._url, params=params).json() + headers = { + "Content-Type": "application/sparql-query", + "Accept": "application/json", + } + params = {"query": query} + resp = self._session.post( + self._url, headers=headers, data=urllib.parse.urlencode(params) + ).json() variables = resp["head"]["vars"] for result in resp["results"]["bindings"]: yield tuple(result.get(variable) for variable in variables) diff --git a/glowtables/table.py b/glowtables/table.py index e0d800e..f133688 100644 --- a/glowtables/table.py +++ b/glowtables/table.py @@ -78,6 +78,10 @@ class LiteralField(Field[rdflib.Literal]): predicate: rdflib.URIRef default: Optional[rdflib.Literal] = None + """If this is not :const:`None`, allows subjects without a statement for this field; + and use this value instead when sorting. + + This is only used when sorting, and isn't displayed.""" def sort_key(self, value: rdflib.Literal) -> Any: """Function suitable as ``key`` argument to :func:`sorted`. diff --git a/glowtables/tests/conftest.py b/glowtables/tests/conftest.py index 7be2bf5..dc6f69b 100644 --- a/glowtables/tests/conftest.py +++ b/glowtables/tests/conftest.py @@ -16,6 +16,8 @@ # pylint: disable=redefined-outer-name +import urllib.parse + import pytest import rdflib @@ -33,11 +35,15 @@ def rdflib_sparql(requests_mock, rdflib_graph: rdflib.Graph) -> RemoteSparqlBack """Returns a SPARQL backend instance for ``rdflib_graph``.""" def json_callback(request, context): - result = rdflib_graph.query(request.json()) + params = urllib.parse.parse_qs(request.text) + (query,) = params["query"] + results = rdflib_graph.query(query) context.status_code = 200 return { - "head": {"vars": result.vars}, - "results": list(result), + "head": {"vars": results.vars}, + "results": { + "bindings": [dict(zip(results.vars, result)) for result in results] + }, } requests_mock.register_uri("POST", "mock://sparql.example.org/", json=json_callback) diff --git a/glowtables/tests/table_test.py b/glowtables/tests/table_test.py index 1a5b2f5..d592150 100644 --- a/glowtables/tests/table_test.py +++ b/glowtables/tests/table_test.py @@ -12,6 +12,11 @@ # You should have received a copy of the GNU Affero General Public License along with # this program. If not, see . +"""Tests :mod:`glowtables.table`. + +Test cases use a graph containing a few CPUs, with names and clock frequencies. +""" + import textwrap import pytest @@ -20,6 +25,40 @@ import rdflib from glowtables.sparql import SparqlBackend from glowtables.table import Language, LiteralField, Table +CPU1_URI = rdflib.URIRef("http://example.org/grown-1700") +CPU2_URI = rdflib.URIRef("http://example.org/grown-3600") +CPU3_URI = rdflib.URIRef("http://example.org/secret-project") +CG_URI = rdflib.URIRef("http://example.org/likestone-underdog") +CPU = rdflib.URIRef("http://example.org/CPU") +DISPLAY_NAME = rdflib.URIRef("http://example.org/display-name") +FREQUENCY = rdflib.URIRef("http://example.org/clock-frequency") +TYPE = rdflib.URIRef("http://example.org/type") + + +@pytest.fixture() +def rdflib_graph() -> rdflib.Graph: + graph = rdflib.Graph() + + # A CPU with all the properties + graph.add((CPU1_URI, DISPLAY_NAME, rdflib.Literal("Grown 1700"))) + graph.add((CPU1_URI, TYPE, CPU)) + graph.add((CPU1_URI, FREQUENCY, rdflib.Literal(3000))) + + # Another CPU, without a frequency + graph.add((CPU2_URI, DISPLAY_NAME, rdflib.Literal("Grown 3600"))) + graph.add((CPU2_URI, TYPE, CPU)) + + # Another CPU, without a name + graph.add((CPU3_URI, TYPE, CPU)) + graph.add((CPU3_URI, FREQUENCY, rdflib.Literal(9000))) + + # Add a graphics card; which should be excluded from CPU searches + graph.add((CG_URI, DISPLAY_NAME, rdflib.Literal("LikeStone Underdog"))) + graph.add((CG_URI, FREQUENCY, rdflib.Literal(1626))) + graph.add((CG_URI, TYPE, rdflib.URIRef("http://example.org/graphics-card"))) + + return graph + def test_single_literal(rdflib_sparql: SparqlBackend) -> None: name_field = LiteralField( @@ -42,7 +81,10 @@ def test_single_literal(rdflib_sparql: SparqlBackend) -> None: """ ) - rdflib_sparql.query(table.sparql()) + assert set(rdflib_sparql.query(table.sparql())) == { + ("Grown 1700",), + ("Grown 3600",), + } def test_two_literals(rdflib_sparql: SparqlBackend) -> None: @@ -72,7 +114,9 @@ def test_two_literals(rdflib_sparql: SparqlBackend) -> None: """ ) - rdflib_sparql.query(table.sparql()) + assert set(rdflib_sparql.query(table.sparql())) == { + ("Grown 1700", "3000"), + } def test_default_value(rdflib_sparql: SparqlBackend) -> None: @@ -82,22 +126,33 @@ def test_default_value(rdflib_sparql: SparqlBackend) -> None: rdflib.URIRef("http://example.org/display-name"), default=rdflib.Literal("Anonymous CPU"), ) + frequency_field = LiteralField( + "frequency", + {Language("en"): "Clock frequency"}, + rdflib.URIRef("http://example.org/clock-frequency"), + default=rdflib.Literal(0), + ) table = Table( - fields=[name_field], + fields=[name_field, frequency_field], constraints="?subject .", ) assert table.sparql() == textwrap.dedent( """ - SELECT ?display_name + SELECT ?display_name ?frequency WHERE { ?subject . OPTIONAL { ?subject ?display_name. }. + OPTIONAL { ?subject ?frequency. }. } """ ) - rdflib_sparql.query(table.sparql()) + assert set(rdflib_sparql.query(table.sparql())) == { + ("Grown 1700", "3000"), + ("Grown 3600", None), + (None, "9000"), + } def test_field_id_subject() -> None: