Skip to content

Custom md target bugfixes #697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions internetarchive/iarequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,11 @@ def _prepare_single_target_body(self, metadata, source_metadata, target, append,
)
else:
patch = prepare_target_patch(
{target: metadata},
metadata,
source_metadata,
append,
target,
append_list,
target,
insert,
expect,
)
Expand Down Expand Up @@ -377,24 +376,27 @@ def _create_patch_tests(expect):


def prepare_target_patch(metadata, source_metadata, append, target,
append_list, key, insert, expect):
nested_dict = _create_nested_dict(metadata)
current = source_metadata
for part in key.split('/'):
current = current.get(part, {})
patch = prepare_patch(nested_dict, current, append, expect, append_list, insert)
return patch


def _create_nested_dict(metadata):
nested = {}
for key_path, value in metadata.items():
parts = key_path.split('/')
current = nested
for part in parts[:-1]:
current = current.setdefault(part, {})
current[parts[-1]] = value
return nested
append_list, insert, expect):
def get_nested_value(data, parts):
current = data
for part in parts:
if isinstance(current, list) and part.isdigit():
current = current[int(part)]
else:
current = current[part]
return current

key_parts = target.split('/')
current_source = get_nested_value(source_metadata, key_parts)

return prepare_patch(
metadata,
current_source,
append,
expect,
append_list,
insert,
)


def prepare_files_patch(metadata, files_metadata, target, append,
Expand Down Expand Up @@ -432,7 +434,7 @@ def _process_non_indexed_keys(metadata, source, prepared, append, append_list, i
if isinstance(value, (int, float, complex)) and not isinstance(value, bool):
value = str(value)

if append_list and source.get(current_key):
if append_list and isinstance(source, dict) and source.get(current_key):
existing = source[current_key]
if not isinstance(existing, list):
existing = [existing]
Expand Down
150 changes: 150 additions & 0 deletions tests/test_iarequest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import copy
import json

import pytest

from internetarchive.iarequest import (
MetadataRequest,
S3Request,
prepare_files_patch,
prepare_patch,
prepare_target_patch,
)
from tests.conftest import PROTOCOL, IaRequestsMock


@pytest.fixture
def sample_metadata():
return copy.deepcopy({
"metadata": {"title": "Test"},
"files": [
{"name": "test.txt", "custom": {"tags": ["old"]}, "foo": "bar"},
],
"dupe_pallet_index": {
"IA9999": ["IA999901"]
}
})


@pytest.mark.parametrize(("metadata", "expected"), [
({"custom": ["new"]}, [{'op': 'add', 'path': '/custom', 'value': ['new']}]),
({"title": "New Title"}, [{'op': 'replace', 'path': '/title', 'value': 'New Title'}]),
({"title": "REMOVE_TAG"}, [{'op': 'remove', 'path': '/title'}]),
])
def test_metadata_patch_operations(metadata, expected, sample_metadata):
patch = prepare_patch(
metadata=metadata,
source_metadata=sample_metadata["metadata"],
append=False,
append_list=False,
insert=False,
)
assert patch == expected


@pytest.mark.parametrize(("metadata", "expected"), [
({"new-key": ["new", "new2"]}, [{'op': 'add', 'path': '/new-key', 'value': ['new', 'new2']}]),
({"custom": "foo new"}, [{'op': 'replace', 'path': '/custom', 'value': 'foo new'}]),
({"custom": "REMOVE_TAG"}, [{'op': 'remove', 'path': '/custom'}]),
])
def test_file_metadata_patch_operations(metadata, expected, sample_metadata):
patch = prepare_files_patch(
metadata=metadata,
files_metadata=sample_metadata["files"],
target="files/test.txt",
append=False,
append_list=False,
insert=False,
expect={}
)
assert patch == expected


@pytest.mark.parametrize(("metadata", "expected"), [
(
{"IA9999": ["UPDATED"], "NEW_ITEM": ["NEW123"]},
[
{'op': 'add', 'path': '/NEW_ITEM', 'value': ['NEW123']},
{'op': 'replace', 'path': '/IA9999/0', 'value': 'UPDATED'}
]
),
])
def test_target_patch_add_and_replace(metadata, expected, sample_metadata):
patch = prepare_target_patch(
metadata=metadata,
source_metadata=sample_metadata,
target="dupe_pallet_index",
append=False,
append_list=False,
insert=False,
expect={}
)
assert patch == expected


@pytest.mark.parametrize(("metadata", "expected"), [
(
{"IA9999": ["IA999901", "IA999902", "IA999903"]},
[{'op': 'add', 'path': '/IA9999/1', 'value': ['IA999901', 'IA999902', 'IA999903']}]
),
(
{"IA9999": "IA999902"},
[{'op': 'add', 'path': '/IA9999/1', 'value': 'IA999902'}]
),
])
def test_target_patch_append_list(metadata, expected, sample_metadata):
patch = prepare_target_patch(
metadata=metadata,
source_metadata=sample_metadata,
target="dupe_pallet_index",
append=False,
append_list=True,
insert=False,
expect={}
)
assert patch == expected


def test_metadata_request_patch_key(sample_metadata):
with IaRequestsMock() as rsps:
rsps.add_metadata_mock('test_item', body=json.dumps(sample_metadata))

req = MetadataRequest(
metadata={"title": "New Title"},
url=f"{PROTOCOL}//archive.org/metadata/test_item"
)
prepared = req.prepare()
assert any(k.endswith('-patch') for k in prepared.data)


def test_scanner_header_injection():
req = S3Request(
method='PUT',
url=f"{PROTOCOL}//s3.us.archive.org/test_item",
metadata={"title": "Test"},
set_scanner=True,
access_key='test_access',
secret_key='test_secret'
)
prepared = req.prepare()
header = prepared.headers.get('x-archive-meta00-scanner', '')
assert 'Internet%20Archive%20Python%20library' in header


@pytest.mark.parametrize(("test_value", "expected"), [
(
"http://example.com/foo bar?q=✓",
"uri(http%3A//example.com/foo%20bar%3Fq%3D%E2%9C%93)"
),
])
def test_metadata_header_uri_encoding(test_value, expected):
req = S3Request(
method='PUT',
url=f"{PROTOCOL}//s3.us.archive.org/test_item",
metadata={"source": test_value},
access_key='test_access',
secret_key='test_secret'
)
prepared = req.prepare()
header = prepared.headers.get('x-archive-meta00-source', '')
assert header == expected