Skip to content

Commit 67696d0

Browse files
author
Víctor Ruiz
authored
Merge pull request #13 from scrapy-plugins/draft-support
Added support for additional schemas
2 parents cd58af6 + 4c61f5d commit 67696d0

10 files changed

+221
-97
lines changed

dev-requiremens.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pytest

scrapy_jsonschema/draft.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
JSON_SCHEMA_DRAFT_3 = "http://json-schema.org/draft-03/schema#"
2+
JSON_SCHEMA_DRAFT_4 = "http://json-schema.org/draft-04/schema#"
3+
JSON_SCHEMA_DRAFT_6 = "http://json-schema.org/draft-06/schema#"
4+
JSON_SCHEMA_DRAFT_7 = "http://json-schema.org/draft-07/schema#"

scrapy_jsonschema/item.py

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,27 @@
11
from abc import ABCMeta
22

33
import six
4-
from jsonschema import Draft4Validator
4+
from jsonschema import (
5+
Draft3Validator,
6+
Draft4Validator,
7+
Draft6Validator,
8+
Draft7Validator,
9+
)
10+
11+
from scrapy_jsonschema.draft import (
12+
JSON_SCHEMA_DRAFT_3,
13+
JSON_SCHEMA_DRAFT_4,
14+
JSON_SCHEMA_DRAFT_6,
15+
JSON_SCHEMA_DRAFT_7,
16+
)
17+
518
from scrapy.item import DictItem, Field
619

720

821
def _merge_schema(base, new):
9-
if base is None:
10-
return new
11-
if new is None:
12-
return base
22+
if base is None or new is None:
23+
return base or new
24+
1325
if all(isinstance(x, dict) for x in (base, new)):
1426
return {
1527
key: _merge_schema(base.get(key), new.get(key))
@@ -21,6 +33,14 @@ def _merge_schema(base, new):
2133

2234

2335
class JsonSchemaMeta(ABCMeta):
36+
37+
draft_to_validator = {
38+
JSON_SCHEMA_DRAFT_3: Draft3Validator,
39+
JSON_SCHEMA_DRAFT_4: Draft4Validator,
40+
JSON_SCHEMA_DRAFT_6: Draft6Validator,
41+
JSON_SCHEMA_DRAFT_7: Draft7Validator,
42+
}
43+
2444
def __new__(mcs, class_name, bases, attrs):
2545
cls = super(JsonSchemaMeta, mcs).__new__(mcs, class_name, bases, attrs)
2646
fields = {}
@@ -33,20 +53,28 @@ def __new__(mcs, class_name, bases, attrs):
3353
schema = _merge_schema(schema, base_schema)
3454
setattr(cls, 'jsonschema', schema)
3555
if not schema:
36-
raise ValueError('{} must contain "jsonschema" attribute'
37-
.format(cls.__name__))
38-
cls.validator = Draft4Validator(schema)
56+
raise ValueError(
57+
'{} must contain "jsonschema" attribute'.format(cls.__name__)
58+
)
59+
cls.validator = cls._get_validator(schema)
3960
cls.validator.check_schema(schema)
4061
for k in schema['properties']:
4162
fields[k] = Field()
4263
cls.fields = cls.fields.copy()
4364
cls.fields.update(fields)
4465
return cls
4566

67+
@classmethod
68+
def _get_validator(cls, schema):
69+
draft_version = schema.get('$schema')
70+
# Default to Draft4Validator for backward-compatibility
71+
validator_class = cls.draft_to_validator.get(
72+
draft_version, Draft4Validator
73+
)
74+
return validator_class(schema)
75+
4676

4777
@six.add_metaclass(JsonSchemaMeta)
4878
class JsonSchemaItem(DictItem):
49-
jsonschema = {
50-
"properties": {}
51-
}
79+
jsonschema = {"properties": {}}
5280
merge_schema = False # Off for backward-compatibility

scrapy_jsonschema/pipeline.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def process_item(self, item, spider):
3737
error_msg = ''
3838
for path, message in paths_messages:
3939
error_msg += u'{}: {}\n'.format(path, message)
40-
raise DropItem(u'schema validation failed: \n {}'
41-
.format(error_msg))
40+
raise DropItem(u'schema validation failed: \n {}'.format(error_msg))
4241

4342
return item

tests/__init__.py

Lines changed: 32 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,63 @@
1+
from scrapy_jsonschema.draft import JSON_SCHEMA_DRAFT_7
12

23
valid_schema = {
4+
"$schema": JSON_SCHEMA_DRAFT_7,
35
"title": "Product",
46
"description": "Some product's description",
57
"type": "object",
68
"properties": {
79
"id": {
810
"description": "The unique identifier for a product",
9-
"type": "integer"
10-
},
11-
"name": {
12-
"description": "Name of the product",
13-
"type": "string"
11+
"type": "integer",
1412
},
13+
"name": {"description": "Name of the product", "type": "string"},
1514
"prices": {
1615
"type": "array",
1716
"items": {
1817
"type": "object",
1918
"properties": {
20-
"name": {
21-
"type": "string",
22-
},
23-
"value": {
24-
"type": "integer",
25-
}
19+
"name": {"type": "string"},
20+
"value": {"type": "integer"},
2621
},
2722
"required": ["name", "value"],
28-
}
29-
}
23+
},
24+
},
3025
},
31-
"required": ["id", "name"]
26+
"required": ["id", "name"],
3227
}
3328

34-
invalid_schema = {
35-
"type": "invalid-type"
36-
}
29+
invalid_schema = {"type": "invalid-type"}
3730

3831
merge_schema_base = {
39-
'required': ['bar'],
40-
'properties': {
41-
'bar': {
42-
'type': 'string',
43-
},
44-
},
32+
"required": ["bar"],
33+
"properties": {"bar": {"type": "string"}},
4534
}
4635

47-
merge_schema_base_2 = {
48-
'properties': {
49-
'foo': {
50-
'type': 'string',
51-
},
52-
}
53-
}
36+
merge_schema_base_2 = {"properties": {"foo": {"type": "string"}}}
5437

5538
merge_schema_new = {
56-
'required': ['foo'],
57-
'properties': {
58-
'foo': {
59-
'type': 'string',
60-
'pattern': r'^\d+$',
61-
},
62-
'baz': {
63-
'type': 'string',
64-
},
39+
"required": ["foo"],
40+
"properties": {
41+
"foo": {"type": "string", "pattern": r"^\d+$"},
42+
"baz": {"type": "string"},
6543
},
6644
}
6745

68-
merged_valid_docs = [
69-
{
70-
'foo': '123',
71-
'bar': 'baz',
72-
'baz': 'bar',
73-
},
74-
]
46+
merged_valid_docs = [{"foo": "123", "bar": "baz", "baz": "bar"}]
7547

7648
merged_invalid_docs = [
7749
{
7850
# valid for base
79-
'bar': 'baz',
51+
"bar": "baz"
8052
},
8153
{
8254
# valid for base 2
83-
'foo': '123',
55+
"foo": "123"
8456
},
8557
{
8658
# valid for new
87-
'foo': '123',
88-
'baz': 'bar',
59+
"foo": "123",
60+
"baz": "bar",
8961
},
9062
]
9163

@@ -94,27 +66,23 @@
9466
"id": 123,
9567
"name": "name",
9668
"prices": [
97-
{
98-
"name": "price1",
99-
"value": 100
100-
},
101-
{
102-
"name": "price2",
103-
"value": 200
104-
}
105-
]
69+
{"name": "price1", "value": 100},
70+
{"name": "price2", "value": 200},
71+
],
10672
}
10773
]
10874

10975
invalid_doc_types = [
11076
{
111-
"id": '123', # this value should be an integer, not its string representation
112-
"name": "name"
77+
"id": "123", # this value should be an integer, not a string
78+
"name": "name",
11379
},
11480
{
11581
"id": 123,
116-
"name": ["name"] # this value should be a string, not a list of strings
117-
}
82+
"name": [
83+
"name"
84+
], # this value should be a string, not a list of strings
85+
},
11886
]
11987

12088
invalid_doc_required = [
@@ -125,5 +93,5 @@
12593
{
12694
# 'name' is missing
12795
"id": {}
128-
}
96+
},
12997
]

tests/test_draf.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from unittest import TestCase
2+
3+
from scrapy_jsonschema.draft import (
4+
JSON_SCHEMA_DRAFT_3,
5+
JSON_SCHEMA_DRAFT_4,
6+
JSON_SCHEMA_DRAFT_6,
7+
JSON_SCHEMA_DRAFT_7,
8+
)
9+
10+
11+
class DraftTest(TestCase):
12+
"Test that draft constants values hasn't been change by mistake"
13+
14+
def test_draft_3(self):
15+
assert JSON_SCHEMA_DRAFT_3 == "http://json-schema.org/draft-03/schema#"
16+
17+
def test_draft_4(self):
18+
assert JSON_SCHEMA_DRAFT_4 == "http://json-schema.org/draft-04/schema#"
19+
20+
def test_draft_6(self):
21+
assert JSON_SCHEMA_DRAFT_6 == "http://json-schema.org/draft-06/schema#"
22+
23+
def test_draft_7(self):
24+
assert JSON_SCHEMA_DRAFT_7 == "http://json-schema.org/draft-07/schema#"

tests/test_item.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import pytest
2+
from unittest import TestCase
3+
4+
from jsonschema.exceptions import SchemaError
5+
from jsonschema import Draft4Validator, Draft7Validator
6+
7+
from scrapy_jsonschema.item import JsonSchemaItem, _merge_schema
8+
from scrapy_jsonschema.draft import JSON_SCHEMA_DRAFT_7
9+
10+
from . import (
11+
valid_schema,
12+
invalid_schema,
13+
merge_schema_base,
14+
merge_schema_base_2,
15+
merge_schema_new,
16+
)
17+
18+
19+
class ValidSchemaTestCase(TestCase):
20+
21+
schema1 = {
22+
"both": 1,
23+
"only_base": 2,
24+
"nested": {"list_to_merge": [1, 2], "both": "foo"},
25+
}
26+
schema2 = {
27+
"both": 3,
28+
"only_new": 4,
29+
"nested": {"list_to_merge": [3], "both": "bar", "only_new": "baz"},
30+
}
31+
32+
def test_no_schema(self):
33+
with pytest.raises(ValueError):
34+
35+
class TestNoSchema(JsonSchemaItem):
36+
jsonschema = None
37+
38+
def test_invalid_schema(self):
39+
with pytest.raises(SchemaError):
40+
41+
class TestItem1(JsonSchemaItem):
42+
jsonschema = invalid_schema
43+
44+
def test_valid_schema(self):
45+
class TestItem2(JsonSchemaItem):
46+
jsonschema = valid_schema
47+
48+
def test_merge_schema_func(self):
49+
expected = {
50+
"both": 1,
51+
"only_base": 2,
52+
"only_new": 4,
53+
"nested": {
54+
"list_to_merge": [1, 2, 3],
55+
"both": "foo",
56+
"only_new": "baz",
57+
},
58+
}
59+
self.assertEqual(_merge_schema(self.schema1, self.schema2), expected)
60+
61+
def test_merge_schema_none(self):
62+
self.assertEqual(_merge_schema(self.schema1, None), self.schema1)
63+
self.assertEqual(_merge_schema(None, self.schema1), self.schema1)
64+
65+
def test_merge_schema(self):
66+
class Base(JsonSchemaItem):
67+
jsonschema = merge_schema_base
68+
69+
class Base2(JsonSchemaItem):
70+
jsonschema = merge_schema_base_2
71+
72+
class Merged(Base, Base2):
73+
jsonschema = merge_schema_new
74+
merge_schema = True
75+
76+
def test_get_validator(self):
77+
schema = {
78+
"$schema": JSON_SCHEMA_DRAFT_7,
79+
"title": "Item with Schema Draft",
80+
}
81+
82+
draft7_validator = JsonSchemaItem._get_validator(schema)
83+
self.assertTrue(isinstance(draft7_validator, Draft7Validator))
84+
85+
no_draft_chema = {"title": "Item without schema Draft"}
86+
default_validator = JsonSchemaItem._get_validator(no_draft_chema)
87+
self.assertTrue(isinstance(default_validator, Draft4Validator))

0 commit comments

Comments
 (0)