diff --git a/.woodpecker.yml b/.woodpecker.yml
index 6cecf48..9b41b4b 100644
--- a/.woodpecker.yml
+++ b/.woodpecker.yml
@@ -20,14 +20,15 @@ pipeline:
test-py3.9:
group: test
image: python:3.9
- commands:
+ commands: &test_commands
+ - apt-get update
+ - apt-get install -y postgresql
- pip3 install mypy .[testing]
- make mypy
- - make pytest
+ - adduser pytest
+ # pytest-postgresql runs pg_ctl, which refuses to run as root
+ - su pytest -c 'make pytest'
test-py3.10:
group: test
image: python:3.10
- commands:
- - pip3 install mypy .[testing]
- - make mypy
- - make pytest
+ commands: *test_commands
diff --git a/opdb/conftest.py b/opdb/conftest.py
new file mode 100644
index 0000000..1a7d0ff
--- /dev/null
+++ b/opdb/conftest.py
@@ -0,0 +1,43 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+pytest fixtures
+"""
+
+import pytest
+
+from opdb.db import Db, models
+
+
+def iter_subclasses(cls):
+ """
+ Recursively yields all subclasses of a class.
+ """
+ yield cls
+ for subcls in cls.__subclasses__():
+ yield from iter_subclasses(subcls)
+
+
+@pytest.fixture
+def opdb_db(postgresql) -> Db:
+ """
+ pytest fixture which yields an empty initialized OPDB database.
+ """
+ with postgresql.cursor() as cur:
+ for name in dir(models):
+ cls = getattr(models, name)
+ if hasattr(cls, "TABLE"):
+ cur.execute(cls.db_schema())
+ return Db(postgresql)
diff --git a/opdb/db/__init__.py b/opdb/db/__init__.py
new file mode 100644
index 0000000..429b06d
--- /dev/null
+++ b/opdb/db/__init__.py
@@ -0,0 +1,20 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+Database management
+"""
+
+from . import models, orm # noqa
+from .db import Db # noqa
diff --git a/opdb/db/db.py b/opdb/db/db.py
new file mode 100644
index 0000000..0d5e679
--- /dev/null
+++ b/opdb/db/db.py
@@ -0,0 +1,65 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+Abstraction over the postgresql database used by OPDB
+"""
+
+from __future__ import annotations
+
+import contextlib
+import typing
+
+import psycopg
+
+from . import models
+
+
+class Db:
+ """
+ Abstraction over the postgresql database used by OPDB
+ """
+
+ def __init__(self, conn: psycopg.Connection):
+ self.conn = conn
+
+ @classmethod
+ @contextlib.contextmanager
+ def open(cls, dsn: str) -> typing.Iterator[Db]:
+ """
+ Context manager, which yields a :class:`Db` object given a libpq connection
+ string (DSN)
+ """
+ with psycopg.connect(dsn) as conn:
+ yield Db(conn)
+
+ def get_last_web_page_snapshot(
+ self, url: str
+ ) -> typing.Optional[models.WebPageSnapshot]:
+ """
+ Returns the last snapshot of the given IRI.
+ """
+ with self.conn.cursor(
+ row_factory=psycopg.rows.class_row(models.WebPageSnapshot)
+ ) as cur:
+ cur.execute("SELECT * FROM web_page_snapshot WHERE url=%s", (url,))
+ return cur.fetchone()
+
+ def add_web_page_snapshots(
+ self, snapshots: typing.Iterable[models.WebPageSnapshot]
+ ) -> None:
+ """
+ Stores new snapshots of web pages to the database.
+ """
+ models.WebPageSnapshot.copy_to_db(self.conn, snapshots)
diff --git a/opdb/db/db_test.py b/opdb/db/db_test.py
new file mode 100644
index 0000000..a9cc31a
--- /dev/null
+++ b/opdb/db/db_test.py
@@ -0,0 +1,42 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+Tests basic insertion and retrieval functions
+"""
+
+import datetime
+
+from opdb.db import Db, models
+
+
+def test_missing_web_page_snapshot(opdb_db: Db):
+ """Tests retrieving a missing web page returns None."""
+ assert opdb_db.get_last_web_page_snapshot("http://nonexistent.org") is None
+
+
+def test_add_web_page_snapshot(opdb_db: Db):
+ """Tests adding a web page and that it can be retrieved."""
+ date = datetime.datetime.now(tz=datetime.timezone.utc)
+ snapshot = models.WebPageSnapshot(
+ url="http://example.org/",
+ snapshot_date=datetime.datetime.now(tz=datetime.timezone.utc),
+ snapshot_url=None,
+ retrieved_at=date,
+ response_headers={"Content-Length": "7"},
+ content=b"foo bar",
+ )
+ opdb_db.add_web_page_snapshots([snapshot])
+
+ assert opdb_db.get_last_web_page_snapshot("http://example.org/") == snapshot
diff --git a/opdb/db/models.py b/opdb/db/models.py
new file mode 100644
index 0000000..11dee3f
--- /dev/null
+++ b/opdb/db/models.py
@@ -0,0 +1,51 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+Classes representing objects in the postgresql database
+"""
+
+import dataclasses
+import datetime
+import typing
+
+from .orm import BaseModel as _BaseModel
+
+
+@dataclasses.dataclass(frozen=True)
+class WebPageSnapshot(_BaseModel):
+ """Local cache of a live webpage"""
+
+ TABLE = "web_page_snapshot"
+ PK = ("url", "snapshot_date")
+
+ url: str
+ """IRI of the page"""
+
+ snapshot_date: datetime.datetime
+ """Moment the snapshot was taken from the live website"""
+
+ snapshot_url: typing.Optional[str]
+ """IRI where the page was downloaded from (:const:`None` unless the snapshot
+ was downloaded from a proxy)."""
+
+ retrieved_at: datetime.datetime
+ """Moment the snapshot was downloaded by opdb and inserted in the DB (differs from
+ :attr:`snapshot_date` if the snapshot was taken by a proxy)."""
+
+ response_headers: dict[str, str]
+ """Response headers of the webpage"""
+
+ content: bytes
+ """Content of the webpage."""
diff --git a/opdb/db/models_test.py b/opdb/db/models_test.py
new file mode 100644
index 0000000..80ccb62
--- /dev/null
+++ b/opdb/db/models_test.py
@@ -0,0 +1,49 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+Tests the ORM
+"""
+
+import datetime
+
+import pytest
+
+from opdb.db import models
+
+
+def test_naive_datetime():
+ """Tests using a naive datetime as attribute of a model raises an error."""
+ tz_date = datetime.datetime.now(tz=datetime.timezone.utc)
+ naive_date = datetime.datetime.now()
+
+ with pytest.raises(TypeError, match="timezone-aware datetime"):
+ models.WebPageSnapshot(
+ url="http://example.org/",
+ snapshot_date=tz_date,
+ snapshot_url=None,
+ retrieved_at=naive_date,
+ response_headers={"Content-Length": b"7"},
+ content=b"foo bar",
+ )
+
+ with pytest.raises(TypeError, match="timezone-aware datetime"):
+ models.WebPageSnapshot(
+ url="http://example.org/",
+ snapshot_date=naive_date,
+ snapshot_url=None,
+ retrieved_at=tz_date,
+ response_headers={"Content-Length": b"7"},
+ content=b"foo bar",
+ )
diff --git a/opdb/db/orm.py b/opdb/db/orm.py
new file mode 100644
index 0000000..29ecc7b
--- /dev/null
+++ b/opdb/db/orm.py
@@ -0,0 +1,157 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+A minimalist ORM
+
+Features:
+
+* generates postgresql schemas
+* provides easy access to postgresql's COPY TO (even for jsonb columns)
+* checks :cls:`datetime.datetime` objects are timezone-aware.
+"""
+
+import dataclasses
+import datetime
+import json
+import typing
+
+import psycopg
+
+_TSelf = typing.TypeVar("_TSelf", bound="BaseModel")
+
+_TYPE_TO_SQL = {
+ datetime.datetime: "timestamptz",
+ str: "text",
+ bytes: "bytea",
+ dict: "jsonb",
+}
+
+
+def _type_to_sql(type_: type, *, nullable=False) -> str:
+ origin = getattr(type_, "__origin__", None)
+ if origin is typing.Union:
+ variants = type_.__args__ # type: ignore[attr-defined]
+ non_none_variants = [
+ variant for variant in variants if not issubclass(variant, type(None))
+ ]
+ if len(variants) != 2:
+ raise TypeError(
+ f"Unsupported type: {type_} (expected exactly 2 variants, "
+ f"got {variants!r})"
+ )
+ if len(non_none_variants) != 1:
+ raise TypeError(
+ f"Unsupported type: {type_} (expected exactly 1 non-None variant, "
+ f"got {non_none_variants!r})"
+ )
+
+ (inner_type,) = non_none_variants
+ # type is Optional[inner_type]
+
+ return _type_to_sql(inner_type, nullable=True)
+ elif origin is not None:
+ # another generic type; simply ignore its __args__
+ return _type_to_sql(origin)
+ else:
+ sql_type = _TYPE_TO_SQL[type_]
+ if not nullable:
+ sql_type += " NOT NULL"
+ return sql_type
+
+
+class BaseModel:
+ """
+ Base class for all model classes, which provides class methods to generate
+ DB schema and efficiently insert instances.
+ """
+
+ TABLE: str
+ """Name of the SQL table."""
+
+ PK: tuple[str, ...]
+ """Primary key of the SQL table."""
+
+ __DATETIME_FIELD_NAMES: list[str]
+ __JSON_FIELD_NAMES: list[str]
+
+ def __init_subclass__(cls, *args, **kwargs):
+ """
+ Precomputes ``__DATETIME_FIELD_NAMES`` and ``__JSON_FIELD_NAMES`` on
+ class initialization, so ``__post_init__`` and ``copy_to_db`` do not need
+ to run the whole introspection machinery every time.
+ """
+ super().__init_subclass__(*args, **kwargs)
+ cls.__DATETIME_FIELD_NAMES = []
+ cls.__JSON_FIELD_NAMES = []
+ for (field_name, field_type) in cls.__annotations__.items():
+ if isinstance(field_type, type):
+ origin = getattr(field_type, "__origin__", None)
+ args = getattr(field_type, "__args__", None)
+ if issubclass(field_type, datetime.datetime) or (
+ origin is typing.Union and datetime.datetime in args
+ ):
+ cls.__DATETIME_FIELD_NAMES.append(field_name)
+ if issubclass(field_type, dict) or (
+ origin is not None and issubclass(origin, dict)
+ ):
+ cls.__JSON_FIELD_NAMES.append(field_name)
+ return cls
+
+ def __post_init__(self):
+ """
+ Errors if any of the fields is a naive datetime.
+ """
+ for field_name in self.__DATETIME_FIELD_NAMES:
+ if getattr(self, field_name).tzinfo is None:
+ raise TypeError(f"{field_name} must be a timezone-aware datetime.")
+
+ @classmethod
+ def copy_to_db(
+ cls: type[_TSelf], conn: psycopg.Connection, objects: typing.Iterable[_TSelf]
+ ) -> None:
+ """
+ Takes a postgresql connection and an iterable of instances,
+ and inserts all the instances efficiently in postgresql.
+ """
+ cols = [field.name for field in dataclasses.fields(cls)]
+ with conn.cursor() as cur:
+ with cur.copy(f"COPY {cls.TABLE} ({', '.join(cols)}) FROM STDIN") as copy:
+ for obj in objects:
+ row = tuple(
+ json.dumps(getattr(obj, col))
+ if col in cls.__JSON_FIELD_NAMES
+ else getattr(obj, col)
+ for col in cols
+ )
+ copy.write_row(row)
+
+ @classmethod
+ def db_schema(cls) -> str:
+ """
+ Returns SQL code suitable to initialize a table to store instances
+ of this class.
+ """
+ return "\n".join(
+ [
+ f"CREATE TABLE IF NOT EXISTS {cls.TABLE} (",
+ ",\n".join(
+ f" {field.name} {_type_to_sql(field.type)}"
+ for field in dataclasses.fields(cls)
+ ),
+ ");",
+ f"CREATE UNIQUE INDEX IF NOT EXISTS {cls.TABLE}_pk ON {cls.TABLE} "
+ f"({', '.join(cls.PK)});",
+ ]
+ )
diff --git a/opdb/db/orm_test.py b/opdb/db/orm_test.py
new file mode 100644
index 0000000..456021d
--- /dev/null
+++ b/opdb/db/orm_test.py
@@ -0,0 +1,37 @@
+# This file is part of the Open Parts Database software
+# Copyright (C) 2022 Valentin Lorentz
+#
+# This program is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Affero General Public License version 3, as published by the
+# Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License along with
+# this program. If not, see .
+
+"""
+Tests the ORM
+"""
+
+import textwrap
+
+from opdb.db import models
+
+
+def test_db_schema():
+ """Tests generation of the DB schema for WebPageSnapshot."""
+ assert models.WebPageSnapshot.db_schema() == textwrap.dedent(
+ """\
+ CREATE TABLE IF NOT EXISTS web_page_snapshot (
+ url text NOT NULL,
+ snapshot_date timestamptz NOT NULL,
+ snapshot_url text,
+ retrieved_at timestamptz NOT NULL,
+ response_headers jsonb NOT NULL,
+ content bytea NOT NULL
+ );
+ CREATE UNIQUE INDEX IF NOT EXISTS web_page_snapshot_pk ON web_page_snapshot (url, snapshot_date);""" # noqa
+ )
diff --git a/opdb/foo_test.py b/opdb/foo_test.py
deleted file mode 100644
index 6d85ef8..0000000
--- a/opdb/foo_test.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""test"""
-
-
-def test_foo():
- """test"""
diff --git a/pyproject.toml b/pyproject.toml
index 4f8eaef..80cabe7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ disable = [
"invalid-name",
# mypy does it better:
"no-member",
+ "import-error",
# flake8 does it already:
"line-too-long",
]
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..8dd399a
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 88
+extend-ignore = E203