Compare commits

...

3 Commits

Author SHA1 Message Date
Val Lorentz 1db60954d6 Save pages returned by Save Page Now
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This is especially useful as pages freshly archived by SPN are not
immediately shown as available by the Wayback Machine API, so we
may send a pointless SPN request right after
2022-09-24 22:43:34 +02:00
Val Lorentz 8139bf5410 Fix crash in Save Page Now, when run without mocks 2022-09-24 22:37:22 +02:00
Val Lorentz 9782ec22ec Add a CLI to initialize the database. 2022-09-24 22:30:25 +02:00
5 changed files with 75 additions and 9 deletions

51
opdb/__main__.py Normal file
View File

@ -0,0 +1,51 @@
# This file is part of the Open Parts Database software
# Copyright (C) 2022 Valentin Lorentz
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU Affero General Public License version 3, as published by the
# Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.
"""
CLI entrypoint.
"""
import sys
import typing
def error(msg: str) -> typing.NoReturn:
"""Prints the message on stderr and exits with code 1."""
print(msg, file=sys.stderr)
sys.exit(1)
def main() -> None:
"""CLI entrypoint"""
try:
(executable, subcommand, *args) = sys.argv
except ValueError:
error(f"Syntax: {sys.argv[0]} <subcommand> [<arg1> [<arg2> [...]]]")
if subcommand == "initdb":
from opdb.db import Db # pylint: disable=import-outside-toplevel
try:
(dsn,) = args
except ValueError:
error(f"Syntax: {executable} initdb <libpq DSN>")
with Db.open(dsn) as db:
db.init()
else:
error(f"Unknown subcommand: {subcommand}")
if __name__ == "__main__":
main()

View File

@ -18,7 +18,7 @@ pytest fixtures
import pytest
from opdb.db import Db, models
from opdb.db import Db
def iter_subclasses(cls):
@ -35,9 +35,6 @@ def opdb_db(postgresql) -> Db:
"""
pytest fixture which yields an empty initialized OPDB database.
"""
with postgresql.cursor() as cur:
for name in dir(models):
cls = getattr(models, name)
if hasattr(cls, "TABLE"):
cur.execute(cls.db_schema())
return Db(postgresql)
db = Db(postgresql)
db.init()
return db

View File

@ -44,6 +44,16 @@ class Db:
with psycopg.connect(dsn) as conn:
yield Db(conn)
def init(self) -> None:
"""
Initializes the schema for the connected database.
"""
with self.conn.cursor() as cur:
for name in dir(models):
cls = getattr(models, name)
if hasattr(cls, "TABLE"):
cur.execute(cls.db_schema())
def get_last_web_page_snapshot(
self, url: str
) -> typing.Optional[models.WebPageSnapshot]:

View File

@ -123,7 +123,9 @@ class Session:
)
def _save_page_now(self, url: str) -> models.WebPageSnapshot:
response = self._session.get(f"https://web.archive.org/save/{url}")
response = self._session.get(
f"https://web.archive.org/save/{url}", allow_redirects=False
)
response.raise_for_status() # TODO: retry
wayback_url = response.headers["Location"]
return self._fetch_wayback_snapshot(url, wayback_url)
@ -155,4 +157,6 @@ class Session:
# If the Internet Archive does not have it yet, trigger its Save Code Now,
# and query the Wayback Machine again
return self._save_page_now(url)
snapshot = self._save_page_now(url)
self._db.add_web_page_snapshots([snapshot])
return snapshot

View File

@ -148,6 +148,8 @@ def test_get__uncached__recent_wb(configured_requests_mocker, opdb_db: Db):
content=b"Example page content from Wayback Machine",
)
assert snapshot == opdb_db.get_last_web_page_snapshot("http://example.org/")
assert [(r.method, r.url) for r in configured_requests_mocker.request_history] == [
(
"GET",
@ -214,6 +216,8 @@ def test_get__uncached__expired_wb(configured_requests_mocker, opdb_db: Db):
content=b"Example page content from Wayback Machine after Save Page Now",
)
assert snapshot == opdb_db.get_last_web_page_snapshot("http://example.org/")
assert [(r.method, r.url) for r in configured_requests_mocker.request_history] == [
(
"GET",