From a996e1b4a6ff53747b5e2520d34223678d1c19ea Mon Sep 17 00:00:00 2001 From: Val Lorentz Date: Sun, 28 May 2023 21:09:59 +0200 Subject: [PATCH] Add demo web app and test with actual data --- .gitignore | 3 + glowtables/examples/__init__.py | 0 glowtables/examples/cats.py | 58 ++++++++++++ glowtables/shortxml.py | 128 ++++++++++++++++++++++++++ glowtables/sparql.py | 6 +- glowtables/style.css | 9 ++ glowtables/table.py | 46 +++++++++- glowtables/tests/conftest.py | 23 +++-- glowtables/tests/table_test.py | 5 + glowtables/views.py | 158 ++++++++++++++++++++++++++++++++ pyproject.toml | 10 +- 11 files changed, 430 insertions(+), 16 deletions(-) create mode 100644 glowtables/examples/__init__.py create mode 100644 glowtables/examples/cats.py create mode 100644 glowtables/shortxml.py create mode 100644 glowtables/style.css create mode 100644 glowtables/views.py diff --git a/.gitignore b/.gitignore index 5d381cc..64de546 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,6 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# Date +*.sqlite3 diff --git a/glowtables/examples/__init__.py b/glowtables/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/glowtables/examples/cats.py b/glowtables/examples/cats.py new file mode 100644 index 0000000..d8b179e --- /dev/null +++ b/glowtables/examples/cats.py @@ -0,0 +1,58 @@ +# This file is part of the Glowtables software +# Copyright (C) 2023 Valentin Lorentz +# +# This program is free software: you can redistribute it and/or modify it under the +# terms of the GNU Affero General Public License version 3, as published by the +# Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with +# this program. If not, see . + +"""Example table of all Wikidata cats""" + +import rdflib + +from glowtables.table import LabeledField, Language, Table + +cats = Table( + id="cats", + display_names={Language("en"): "Cats"}, + fields=[ + LabeledField( + "name", + {Language("en"): "Name property (not label)"}, + str, + rdflib.URIRef("http://www.wikidata.org/prop/direct/P2561"), + default="", + ), + LabeledField( + "breed", + {Language("en"): "Breed"}, + str, + rdflib.URIRef("http://www.wikidata.org/prop/direct/P4743"), + ), + LabeledField( + "instanceof", + {Language("en"): "Instance of"}, + str, + rdflib.URIRef("http://www.wikidata.org/prop/direct/P31"), + ), + LabeledField( + "haircolor", + {Language("en"): "Hair color"}, + str, + rdflib.URIRef("http://www.wikidata.org/prop/direct/P1884"), + default="", + ), + ], + constraints=""" + ?subject + + + . + """, # instance of cat +) diff --git a/glowtables/shortxml.py b/glowtables/shortxml.py new file mode 100644 index 0000000..9fd1790 --- /dev/null +++ b/glowtables/shortxml.py @@ -0,0 +1,128 @@ +# Copyright (c) 2023 Valentin Lorentz +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# pylint: disable=consider-using-f-string,missing-class-docstring + +"""This module allows writing XML ASTs in a way that is more concise than the default +:mod:`xml.etree.ElementTree` interface. + +For example: + +.. code-block:: python + + from .shortxml import Namespace + + HTML = Namespace("http://www.w3.org/1999/xhtml") + + page = HTML.html( + HTML.head( + HTML.title("irctest dashboard"), + HTML.link(rel="stylesheet", type="text/css", href="./style.css"), + ), + HTML.body( + HTML.h1("irctest dashboard"), + HTML.h2("Tests by command/specification"), + HTML.dl( + [ + ( # elements can be arbitrarily nested in lists + HTML.dt(HTML.a(title, href=f"./{title}.xhtml")), + HTML.dd(defintion), + ) + for title, definition in sorted(definitions) + ], + class_="module-index", + ), + HTML.h2("Tests by implementation"), + HTML.ul( + [ + HTML.li(HTML.a(job, href=f"./{file_name}")) + for job, file_name in sorted(job_pages) + ], + class_="job-index", + ), + ), + ) + + print(ET.tostring(page, default_namespace=HTML.uri)) + + +Attributes can be passed either as dictionaries or as kwargs, and can be mixed +with child elements. +Trailing underscores are stripped from attributes, which allows passing reserved +Python keywords (eg. ``class_`` instead of ``class``) + +Attributes are always qualified, and share the namespace of the element they are +attached to. + +Mixed content (elements containing both text and child elements) is not supported. +""" + +import xml.etree.ElementTree as ET +from typing import Dict, Iterable, Union + + +def _namespacify(ns: str, s: str) -> str: + return "{%s}%s" % (ns, s) + + +_Children = Union[None, Dict[str, str], ET.Element, Iterable["_Children"]] + + +class ElementFactory: + def __init__(self, namespace: str, tag: str): + self._tag = _namespacify(namespace, tag) + self._namespace = namespace + + def __call__(self, *args: Union[str, _Children], **kwargs: str) -> ET.Element: + e = ET.Element(self._tag) + + attributes = {k.rstrip("_"): v for (k, v) in kwargs.items()} + children = [*args, attributes] + + if args and isinstance(children[0], str): + e.text = children[0] + children.pop(0) + + for child in children: + self._append_child(e, child) + + return e + + def _append_child(self, e: ET.Element, child: _Children) -> None: + if isinstance(child, ET.Element): + e.append(child) + elif child is None: + pass + elif isinstance(child, dict): + for k, v in child.items(): + e.set(_namespacify(self._namespace, k), str(v)) + elif isinstance(child, str): + raise ValueError("Mixed content is not supported") + else: + for grandchild in child: + self._append_child(e, grandchild) + + +class Namespace: + def __init__(self, uri: str): + self.uri = uri + + def __getattr__(self, tag: str) -> ElementFactory: + return ElementFactory(self.uri, tag) diff --git a/glowtables/sparql.py b/glowtables/sparql.py index a51d968..22a699f 100644 --- a/glowtables/sparql.py +++ b/glowtables/sparql.py @@ -16,7 +16,6 @@ import abc import json -import urllib.parse from typing import Iterable import requests @@ -50,13 +49,10 @@ class RemoteSparqlBackend(SparqlBackend): "Content-Type": "application/sparql-query", "Accept": "application/json", } - params = {"query": query} resp_text = self._cache.get(self._url, query) if not resp_text: - resp_text = self._session.post( - self._url, headers=headers, data=urllib.parse.urlencode(params) - ).text + resp_text = self._session.post(self._url, headers=headers, data=query).text self._cache.set(self._url, query, resp_text) resp = json.loads(resp_text) diff --git a/glowtables/style.css b/glowtables/style.css new file mode 100644 index 0000000..f1b077d --- /dev/null +++ b/glowtables/style.css @@ -0,0 +1,9 @@ +@media (prefers-color-scheme: dark) { + body { + background-color: #121212; + color: rgba(255, 255, 255, 0.87); + } + a { + filter: invert(0.85) hue-rotate(180deg); + } +} diff --git a/glowtables/table.py b/glowtables/table.py index 98533a9..85ec303 100644 --- a/glowtables/table.py +++ b/glowtables/table.py @@ -111,6 +111,48 @@ class LiteralField(Field[_TFieldValue], Generic[_TFieldValue]): return f"OPTIONAL {{ {statement} }}." +@dataclasses.dataclass +class LabeledField(Field[_TFieldValue], Generic[_TFieldValue]): + """Simplest field: its value is a literal directly on the subject""" + + predicate: rdflib.URIRef + + default: Optional[_TFieldValue] = None + """If this is not :const:`None`, allows subjects without a statement for this field; + and use this value instead when sorting. + + This is only used when sorting, and isn't displayed.""" + + def sort_key(self, value: Optional[_TFieldValue]) -> Any: + """Function suitable as ``key`` argument to :func:`sorted`. + + Defaults to the identity function.""" + if value is None: + if self.default is None: + raise ValueError(f"{self.id} value is unexpectedly None") + return self.sort_key(self.default) + return value + + def sparql( + self, + subject_var: SparqlVariable, + object_var: SparqlVariable, + new_var: Callable[[], SparqlVariable], + ) -> str: + node_var = new_var() + statement = f""" + ?{subject_var} <{self.predicate}> ?{node_var}. + SERVICE {{ + "en". + ?{node_var} ?{object_var}. + }} + """ # noqa + if self.default is None: + return statement + else: + return f"OPTIONAL {{ {statement} }}." + + @dataclasses.dataclass class Table: """A table, along with its fields description.""" @@ -127,7 +169,7 @@ class Table: (by default, ``?subject``). """ - id: Optional[str] = None + id: str """Unique within a Glowtable instance""" display_names: dict[Language, str] = dataclasses.field(default_factory=dict) @@ -188,6 +230,6 @@ class Table: """ for row in backend.query(self.sparql()): yield tuple( - None if cell is None else field.parse(cell) + None if cell is None else field.parse(cell["value"]) for (field, cell) in zip(self.fields, row) ) diff --git a/glowtables/tests/conftest.py b/glowtables/tests/conftest.py index cae9857..7d6be12 100644 --- a/glowtables/tests/conftest.py +++ b/glowtables/tests/conftest.py @@ -16,8 +16,6 @@ # pylint: disable=redefined-outer-name -import urllib.parse - import pytest import rdflib @@ -35,15 +33,28 @@ def rdflib_graph() -> rdflib.Graph: def rdflib_sparql(requests_mock, rdflib_graph: rdflib.Graph) -> RemoteSparqlBackend: """Returns a SPARQL backend instance for ``rdflib_graph``.""" + def rdflib_to_json(o) -> dict: + if isinstance(o, rdflib.Literal): + return {"type": "literal", "value": str(o)} + elif isinstance(o, rdflib.URIRef): + return {"type": "uri", "value": str(o)} + else: + raise NotImplementedError(o) + def json_callback(request, context): - params = urllib.parse.parse_qs(request.text) - (query,) = params["query"] - results = rdflib_graph.query(query) + results = rdflib_graph.query(request.text) context.status_code = 200 return { "head": {"vars": results.vars}, "results": { - "bindings": [dict(zip(results.vars, result)) for result in results] + "bindings": [ + { + k: rdflib_to_json(v) + for (k, v) in zip(results.vars, result) + if v is not None + } + for result in results + ] }, } diff --git a/glowtables/tests/table_test.py b/glowtables/tests/table_test.py index aaadcff..c09e48e 100644 --- a/glowtables/tests/table_test.py +++ b/glowtables/tests/table_test.py @@ -69,6 +69,7 @@ def test_single_literal(rdflib_sparql: SparqlBackend) -> None: rdflib.URIRef("http://example.org/display-name"), ) table = Table( + id="test-table", fields=[name_field], constraints="?subject .", ) @@ -103,6 +104,7 @@ def test_two_literals(rdflib_sparql: SparqlBackend) -> None: rdflib.URIRef("http://example.org/clock-frequency"), ) table = Table( + id="test-table", fields=[name_field, frequency_field], constraints="?subject .", ) @@ -139,6 +141,7 @@ def test_default_value(rdflib_sparql: SparqlBackend) -> None: default=Decimal(0), ) table = Table( + id="test-table", fields=[name_field, frequency_field], constraints="?subject .", ) @@ -170,6 +173,7 @@ def test_field_id_subject() -> None: ) with pytest.raises(ValueError, match="both subject and a field id"): Table( + id="test-table", fields=[name_field], constraints="", ) @@ -190,6 +194,7 @@ def test_field_id_clash() -> None: ) with pytest.raises(ValueError, match="has duplicate field ids: name"): Table( + id="test-table", fields=[name_field, display_name_field], constraints="", ) diff --git a/glowtables/views.py b/glowtables/views.py new file mode 100644 index 0000000..8df4b9f --- /dev/null +++ b/glowtables/views.py @@ -0,0 +1,158 @@ +# This file is part of the Glowtables software +# Copyright (C) 2023 Valentin Lorentz +# +# This program is free software: you can redistribute it and/or modify it under the +# terms of the GNU Affero General Public License version 3, as published by the +# Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with +# this program. If not, see . + +"""Minimal webapp to display Glowtables""" + +import functools +import importlib.metadata +import importlib.resources +import logging +import operator +import xml.etree.ElementTree as ET +from typing import Callable, List, TypeVar + +import flask + +from .cache import Cache +from .shortxml import Namespace +from .sparql import RemoteSparqlBackend +from .table import Language, Table + +HTML = Namespace("http://www.w3.org/1999/xhtml") +LANG = Language("en") # TODO: configurable + +logger = logging.getLogger(__name__) + +app = flask.Flask(__name__) + +TView = TypeVar("TView", bound=Callable) + + +def _sparql_backend() -> RemoteSparqlBackend: + return RemoteSparqlBackend( + "https://query.wikidata.org/sparql", + agent="Unconfigured Glowtable instance", + cache=Cache("file:sparql_cache.sqlite3"), + ) + + +def xhtml_view(f: TView) -> TView: + """Decorator for Flask views which may return XHTML as :mod:`xml.etree.ElementTree` + objects.""" + + @functools.wraps(f) + def newf(*args, **kwargs): + res = f(*args, **kwargs) + if isinstance(res, (ET.Element, ET.ElementTree)): + xml = ET.tostring(res, default_namespace=HTML.uri) + return flask.Response(xml, mimetype="application/xhtml+xml") + else: + return res + + return newf # type: ignore[return-value] + + +def list_tables() -> List[Table]: + """Returns all :class:`Table` instances registered as ``glowtables.tables`` + entrypoints.""" + table_entrypoints: List[ + importlib.metadata.EntryPoint + ] = importlib.metadata.entry_points( # type: ignore[call-arg,assignment] + group="glowtables.tables" + ) + tables = [] + for table_entrypoint in sorted(table_entrypoints, key=operator.attrgetter("name")): + table = table_entrypoint.load() + if not isinstance(table, Table): + logger.error( + "%s is %r, which is not an instance of glowtables.table.Table", + table_entrypoint.name, + table, + ) + continue + tables.append(table) + + return tables + + +@app.route("/") +@xhtml_view +def index() -> ET.Element: + """Displays the list of tables.""" + tables = list_tables() + + return HTML.html( + HTML.head( + HTML.title("Glowtables"), + HTML.link(rel="stylesheet", type="text/css", href="/style.css"), + ), + HTML.body( + HTML.h1("Glowtables"), + HTML.ul( + [ + HTML.li( + HTML.a(table.display_names[LANG], href=f"/tables/{table.id}/") + ) + for table in tables + ] + ) + if tables + else HTML.p( + """ + There are no tables defined, check the Glowtables documentation + to find how to configure them. + """ + ), + ), + ) + + +@app.route("/style.css") +def style() -> flask.Response: + """Serves the CSS.""" + css = importlib.resources.files(__package__).joinpath("style.css").read_bytes() + return flask.Response(css, mimetype="text/css") + + +@app.route("/tables//") +@xhtml_view +def table_(table_id: str) -> ET.Element: + """Displays a table.""" + tables = list_tables() + for table in tables: + if table.id == table_id: + break + else: + flask.abort(404) + + return HTML.html( + HTML.head( + HTML.title("Glowtables"), + HTML.link(rel="stylesheet", type="text/css", href="/style.css"), + ), + HTML.body( + HTML.h1(table.display_names[LANG]), + HTML.table( + HTML.thead( + HTML.tr( + HTML.th(field.display_names[LANG]) for field in table.fields + ) + ), + HTML.tbody( + HTML.tr(HTML.td(cell) for cell in row) + for row in table.query(_sparql_backend()) + ), + ), + ), + ) diff --git a/pyproject.toml b/pyproject.toml index df505d1..de02ffd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,9 @@ name = "glowtables" version = "0.0.1" requires-python = ">=3.9" dependencies = [ - "flask ~= 2.0.0", - "rdflib ~= 6.0.0", - "requests ~= 3.0.0", + "flask ~= 2.0", + "rdflib ~= 6.0", + "requests ~= 2.0", ] [project.optional-dependencies] @@ -21,6 +21,9 @@ testing = [ "types-setuptools", ] +[project.entry-points."glowtables.tables"] +example_cats = "glowtables.examples.cats:cats" + [tool.isort] profile = "black" @@ -51,6 +54,7 @@ disable = [ "no-member", "unsupported-membership-test", "import-error", + "undefined-loop-variable", # flake8 does it already: "line-too-long", ]