diff --git a/glowtables/__init__.py b/glowtables/__init__.py index e69de29..08cadd3 100644 --- a/glowtables/__init__.py +++ b/glowtables/__init__.py @@ -0,0 +1,13 @@ +# This file is part of the Glowtables software +# Copyright (C) 2023 Valentin Lorentz +# +# This program is free software: you can redistribute it and/or modify it under the +# terms of the GNU Affero General Public License version 3, as published by the +# Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with +# this program. If not, see . diff --git a/glowtables/table.py b/glowtables/table.py new file mode 100644 index 0000000..65e8ee2 --- /dev/null +++ b/glowtables/table.py @@ -0,0 +1,174 @@ +# This file is part of the Glowtables software +# Copyright (C) 2023 Valentin Lorentz +# +# This program is free software: you can redistribute it and/or modify it under the +# terms of the GNU Affero General Public License version 3, as published by the +# Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with +# this program. If not, see . + +"""Data model""" + +import abc +import collections +import dataclasses +import itertools +import textwrap +from typing import Any, Callable, Generic, Iterator, NewType, Optional, TypeVar + +import rdflib + +Language = NewType("Language", str) +"""ISO 639-1 code""" + +SparqlVariable = NewType("SparqlVariable", str) +"""A variable within a SPARQL query, without the leading ``?``.""" + +_TFieldValue = TypeVar("_TFieldValue") + + +@dataclasses.dataclass +class Field(abc.ABC, Generic[_TFieldValue]): + """Abstract class for a table field.""" + + id: str + """Unique within a table""" + + display_names: dict[Language, str] + """Localized name for the field (eg. in a table header)""" + + @abc.abstractmethod + def sort_key(self, value: _TFieldValue): + """Function suitable as ``key`` argument to :func:`sorted`. + + Defaults to the identity function.""" + + @abc.abstractmethod + def sparql( + self, + subject_var: SparqlVariable, + object_var: SparqlVariable, + new_var: Callable[[], SparqlVariable], + ) -> str: + """ + Given the SPARQL variable of a subject and object, returns SPARQL statements + which bind the ``object_var`` to the value of the field for the subject bound + to ``subject_var``. + + For example, if this ``Field`` represents the `"CPU frequency" + `, ``subject_var`` is ``a``, and + ``object_var`` is `b``, this will return:: + + ?subject_var ?object_var. + + Typically there is only one statement, but more statements are needed to fetch + nodes which aren't neighbors. + """ + + +@dataclasses.dataclass +class LiteralField(Field[rdflib.Literal]): + """Simplest field: its value is a literal directly on the subject""" + + predicate: rdflib.URIRef + + default: Optional[rdflib.Literal] = None + + def sort_key(self, value: rdflib.Literal) -> Any: + """Function suitable as ``key`` argument to :func:`sorted`. + + Defaults to the identity function.""" + if value is None: + if self.default is None: + raise ValueError(f"{self.id} value is unexpectedly None") + return sort_key(self.default) + return value + + def sparql( + self, + subject_var: SparqlVariable, + object_var: SparqlVariable, + new_var: Callable[[], SparqlVariable], + ) -> str: + statement = f"?{subject_var} <{self.predicate}> ?{object_var}." + if self.default is None: + return statement + else: + return f"OPTIONAL {{ {statement} }}." + + +@dataclasses.dataclass +class Table: + """A table, along with its fields description.""" + + fields: list[Field] + """Ordered list of all fields of the table. Includes hidden and filter-only fields. + """ + + constraints: str + """SPARQL statements which constrain the set of nodes used as main subject + for table entries. + + The variable bound to the subject is what is defined in :attr:`subject` + (by default, ``?subject``). + """ + + id: Optional[str] = None + """Unique within a Glowtable instance""" + + display_names: dict[Language, str] = dataclasses.field(default_factory=dict) + """Localized name for the table (eg. on a page title)""" + + subject: SparqlVariable = SparqlVariable("subject") + + sparql_template = textwrap.dedent( + """ + SELECT {columns} + WHERE {{ + {constraints} + {statements} + }} + """ + ) + + def __post_init__(self) -> None: + field_ids = [field.id for field in self.fields] + + if str(self.subject) in field_ids: + raise ValueError(f"{self.subject} is both subject and a field id.") + + duplicate_field_ids = [ + field_id + for (field_id, count) in collections.Counter(field_ids).items() + if count > 1 + ] + if duplicate_field_ids: + raise ValueError( + f"{self} has duplicate field ids: {', '.join(duplicate_field_ids)}" + ) + + def sparql(self) -> str: + """Returns a SPARQL query suitable to get records for this table.""" + + def new_var(prefix: str) -> Iterator[SparqlVariable]: + for i in itertools.count(): + yield SparqlVariable(f"{prefix}{i}") + + subject = SparqlVariable("subject") + columns = ", ".join(f"?{field.id}" for field in self.fields) + statements = "\n ".join( + field.sparql(subject, SparqlVariable(field.id), new_var(field.id).__next__) + for field in self.fields + ) + constraints = textwrap.indent(self.constraints, " ").strip() + return self.sparql_template.format( + subject=subject, + columns=columns, + constraints=constraints, + statements=statements.strip(), + ) diff --git a/glowtables/tests/table_test.py b/glowtables/tests/table_test.py new file mode 100644 index 0000000..a173f4b --- /dev/null +++ b/glowtables/tests/table_test.py @@ -0,0 +1,92 @@ +# This file is part of the Glowtables software +# Copyright (C) 2023 Valentin Lorentz +# +# This program is free software: you can redistribute it and/or modify it under the +# terms of the GNU Affero General Public License version 3, as published by the +# Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with +# this program. If not, see . + +import textwrap + +import rdflib + +from glowtables.table import Language, LiteralField, Table + + +def test_single_literal() -> None: + name_field = LiteralField( + "display_name", + {Language("en"): "Name"}, + rdflib.URIRef("http://example.org/display-name"), + ) + table = Table( + fields=[name_field], + constraints="?subject .", + ) + + assert table.sparql() == textwrap.dedent( + """ + SELECT ?display_name + WHERE { + ?subject . + ?subject ?display_name. + } + """ + ) + + +def test_two_literals() -> None: + name_field = LiteralField( + "display_name", + {Language("en"): "Name"}, + rdflib.URIRef("http://example.org/display-name"), + ) + frequency_field = LiteralField( + "frequency", + {Language("en"): "Clock frequency"}, + rdflib.URIRef("http://example.org/clock-frequency"), + ) + table = Table( + fields=[name_field, frequency_field], + constraints="?subject .", + ) + + assert table.sparql() == textwrap.dedent( + """ + SELECT ?display_name, ?frequency + WHERE { + ?subject . + ?subject ?display_name. + ?subject ?frequency. + } + """ + ) + + +def test_default_value() -> None: + name_field = LiteralField( + "display_name", + {Language("en"): "Name"}, + rdflib.URIRef("http://example.org/display-name"), + default=rdflib.Literal("Anonymous CPU") + ) + table = Table( + fields=[name_field], + constraints="?subject .", + ) + + assert table.sparql() == textwrap.dedent( + """ + SELECT ?display_name + WHERE { + ?subject . + OPTIONAL { ?subject ?display_name. }. + } + """ + ) diff --git a/pyproject.toml b/pyproject.toml index f00d917..2160f26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,12 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] -name = "globtables" +name = "glowtables" version = "0.0.1" requires-python = ">=3.9" dependencies = [ - "flask == 2.*", + "flask ~= 2.0.0", + "rdflib ~= 6.0.0", ] [project.optional-dependencies] diff --git a/tests/placeholder_test.py b/tests/placeholder_test.py deleted file mode 100644 index a3e981b..0000000 --- a/tests/placeholder_test.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_nothing(): - pass