Commit 1fe8f984 authored by Willem ter Berg's avatar Willem ter Berg

Update to accommodate Solr

parent dea308f6
......@@ -13,6 +13,7 @@ import ckanext.dcatdonl.schema.schema_factory as schema_factory
class DONLPlugin(plugins.SingletonPlugin, tk.DefaultDatasetForm):
plugins.implements(plugins.IDatasetForm)
plugins.implements(plugins.IValidators)
plugins.implements(plugins.IPackageController, inherit=True)
# IValidators
......@@ -75,3 +76,32 @@ class DONLPlugin(plugins.SingletonPlugin, tk.DefaultDatasetForm):
schema = schema_factory.get_schema('DCAT-AP-DONL').show_schema(schema)
return schema
def before_index(self, package_dict):
"""
Ensures that the multivalued properties will be send as lists to SOLR.
:param package_dict: dict, the original dictionary of the package
:return: dict, the original, possibly modified, dictionary of the package
"""
multivalued_properties = ['alternate_identifier', 'conforms_to', 'related_resource',
'source', 'version_notes', 'has_version', 'is_version_of',
'provenance', 'documentation', 'sample', 'theme',
'spatial_scheme', 'spatial_value']
for prop in multivalued_properties:
try:
package_dict[prop] = package_dict[prop].replace('{', '').replace('}', '').split(',')
except KeyError:
pass
resource_multivalued = ['language', 'download_url', 'linked_schemas', 'documentation']
for prop in resource_multivalued:
try:
for resource in package_dict['resources']:
resource[prop] = resource[prop].replace('{', '').replace('}', '').split(',')
except KeyError:
pass
return package_dict
......@@ -29,7 +29,6 @@ def create_schema(schema):
overheid_license = tk.get_validator('overheid_license')
donl_catalogs = tk.get_validator('donl_catalogs')
donl_language = tk.get_validator('donl_language')
donl_languages = tk.get_validator('donl_languages')
donl_organization = tk.get_validator('donl_organization')
mdr_filetype_nal = tk.get_validator('mdr_filetype_nal')
iana_mediatypes = tk.get_validator('iana_mediatypes')
......@@ -39,7 +38,6 @@ def create_schema(schema):
is_uri = tk.get_validator('is_uri')
is_date = tk.get_validator('is_date')
is_list = tk.get_validator('is_list')
are_uris = tk.get_validator('are_uris')
spatial_validation = tk.get_validator('spatial_validation')
temporal_validation = tk.get_validator('temporal_validation')
date_planned_validation = tk.get_validator('date_planned_validation')
......@@ -47,52 +45,52 @@ def create_schema(schema):
hash_validation = tk.get_validator('hash_validation')
legal_foundation_validation = tk.get_validator('legal_foundation_validation')
rights_validation = tk.get_validator('rights_validation')
to_curly_string_if_list = tk.get_converter('convert_to_curly_string_if_list')
schema.update({
'identifier': [mandatory, is_string, to_extras],
'alternate_identifier': [recommended, is_list, to_extras],
# property [classification, type, content, conversion]
'identifier': [mandatory, is_string, is_uri, to_extras],
'title': [mandatory, is_string],
'notes': [mandatory, is_string],
'modified': [mandatory, is_date, to_extras],
'source_catalog': [recommended, donl_catalogs, to_extras],
'language': [mandatory, donl_languages, to_extras],
'metadata_language': [mandatory, donl_language, to_extras],
'theme': [mandatory, overheid_taxonomiebeleidsagenda, to_extras],
'changetype': [adms_changetype, to_extras],
'modified': [mandatory, is_string, is_date, to_extras],
'language': [mandatory, is_list, donl_language, to_extras],
'metadata_language': [mandatory, is_string, donl_language, to_extras],
'theme': [mandatory, is_list, overheid_taxonomiebeleidsagenda, to_extras],
'authority': [mandatory, is_string, donl_organization, to_extras],
'publisher': [mandatory, is_string, donl_organization, to_extras],
'license': [mandatory, overheid_license],
'contact_point_name': [mandatory, is_string, to_extras],
'contact_point_email': [recommended, is_string, to_extras],
'contact_point_address': [recommended, is_string, to_extras],
'contact_point_name': [mandatory, is_string, to_extras],
'contact_point_phone': [recommended, is_string, to_extras],
'contact_point_website': [recommended, is_string, to_extras],
'contact_point_website': [recommended, is_string, is_uri, to_extras],
'contact_point_title': [recommended, is_string, to_extras],
'authority': [mandatory, donl_organization, to_extras],
'publisher': [mandatory, donl_organization, to_extras],
'license': [mandatory, overheid_license],
'access_rights': [recommended, overheid_openbaarheidsniveau, to_extras],
'url': [recommended, is_uri],
'spatial_scheme': [recommended, overheid_spatial_scheme, to_extras],
'alternate_identifier': [recommended, is_list, is_uri, to_extras],
'access_rights': [recommended, is_string, overheid_openbaarheidsniveau, to_extras],
'url': [recommended, is_string, is_uri],
'spatial_scheme': [recommended, is_list, overheid_spatial_scheme, to_extras],
'spatial_value': [recommended, is_list, to_extras],
'temporal_label': [recommended, is_string, to_extras],
'temporal_start': [recommended, is_date, to_extras],
'temporal_end': [recommended, is_date, to_extras],
'temporal_start': [recommended, is_string, is_date, to_extras],
'temporal_end': [recommended, is_string, is_date, to_extras],
'conforms_to': [recommended, is_list, to_extras],
'related_resource': [recommended, are_uris, to_extras],
'source': [recommended, are_uris, to_extras],
'issued': [recommended, is_date, to_extras],
'related_resource': [recommended, is_string, is_uri, to_extras],
'source': [recommended, is_string, is_uri, to_extras],
'issued': [recommended, is_string, is_date, to_extras],
'version': [recommended, is_string],
'version_notes': [recommended, is_list, to_extras],
'has_version': [recommended, is_list, to_extras],
'is_version_of': [recommended, is_list, to_extras],
'legal_foundation_ref': [recommended, is_string, to_extras],
'legal_foundation_uri': [recommended, is_uri, to_extras],
'legal_foundation_uri': [recommended, is_string, is_uri, to_extras],
'legal_foundation_label': [recommended, is_string, to_extras],
'documentation': [optional, are_uris, to_extras],
'frequency': [optional, overheid_frequency, to_extras],
'provenance': [optional, are_uris, to_extras],
'sample': [optional, are_uris, to_extras],
'dataset_status': [recommended, overheid_dataset_status, to_extras],
'date_planned': [optional, is_date, to_extras],
'source_catalog': [recommended, is_string, donl_catalogs, to_extras],
'dataset_status': [recommended, is_string, overheid_dataset_status, to_extras],
'date_planned': [optional, is_string, is_date, to_extras],
'documentation': [optional, is_list, is_uri, to_extras],
'frequency': [optional, is_string, overheid_frequency, to_extras],
'provenance': [optional, is_list, is_uri, to_extras],
'sample': [optional, is_list, is_uri, to_extras],
'changetype': [adms_changetype, to_extras],
'__after': [spatial_validation,
temporal_validation,
contact_point_validation,
......@@ -105,21 +103,21 @@ def create_schema(schema):
'url': [mandatory, is_string],
'name': [mandatory, is_string],
'description': [mandatory, is_string],
'metadata_language': [mandatory, donl_language],
'language': [mandatory, donl_languages, to_curly_string_if_list],
'metadata_language': [mandatory, is_string, donl_language],
'language': [mandatory, is_list, donl_language],
'license': [mandatory, overheid_license],
'format': [mandatory, mdr_filetype_nal],
'format': [mandatory, is_string, mdr_filetype_nal],
'size': [recommended, is_number],
'download_url': [recommended, are_uris, to_curly_string_if_list],
'mimetype': [recommended, iana_mediatypes],
'release_date': [recommended, is_date],
'download_url': [recommended, is_string, is_uri],
'mimetype': [recommended, is_string, iana_mediatypes],
'release_date': [recommended, is_string, is_date],
'rights': [recommended, is_string],
'status': [recommended, adms_distributiestatus],
'modification_date': [recommended, is_date],
'linked_schemas': [recommended, is_list, to_curly_string_if_list],
'status': [recommended, is_string, adms_distributiestatus],
'modification_date': [recommended, is_string, is_date],
'linked_schemas': [recommended, is_list, is_uri],
'hash': [optional, is_string],
'hash_algorithm': [optional, is_string],
'documentation': [optional, are_uris, to_curly_string_if_list],
'documentation': [optional, is_list, is_uri],
'__after': [hash_validation]
})
......
......@@ -18,10 +18,10 @@ def valid_hash(key, data, errors, context):
hash_key = ('resources', key[1], 'hash')
hash_algorithm = ('resources', key[1], 'hash_algorithm')
if hash_key in data and not data[hash_key] == '' and not hash_algorithm in data:
if hash_key in data and not data[hash_key] == '' and hash_algorithm not in data:
errors[hash_algorithm].append('when hash is provided, hash_algorithm must too be provided')
if hash_algorithm in data and not hash_key in data:
if hash_algorithm in data and hash_key not in data:
errors[hash_key].append('when hash_algorithm is provided, hash must too be provided')
return key, data, errors, context
......@@ -5,28 +5,40 @@ Module that provides the validation methods required to validate temporal proper
"""
from datetime import datetime
import ckan.plugins.toolkit as tk
_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
def is_date(value):
def is_date(key, data, errors, context):
"""
Checks if a given string can be parsed as a datetime object with the following pattern:
YYYY-MM-DDTHH:MM:SS. Will throw a ckan.plugins.toolkit.Invalid exception when it cannot.
:param value: string, the value to check
:return: string, the original value
:param key:
:param data:
:param errors:
:param context:
:return:
"""
import ckanext.dcatdonl.validator.validator_factory as validator_factory
value = data[key]
validator_factory.get_validator('is_string')(value)
if not errors[key]:
errors[key] = []
if isinstance(value, list):
for date_time in value:
try:
datetime.strptime(date_time, _DATETIME_FORMAT)
except ValueError:
errors[key].append('value must be a valid date (yyyy-mm-ddThh:mm:ss)')
return key, data, errors, context
try:
datetime.strptime(value, _DATETIME_FORMAT)
except ValueError:
raise tk.Invalid('value must be a valid date (yyyy-mm-ddThh:mm:ss)')
errors[key].append('value must be a valid date (yyyy-mm-ddThh:mm:ss)')
return value
......@@ -41,10 +53,7 @@ def valid_temporal(key, data, errors, context):
:param context: dict, argument to match CKAN method signature, not used
:return: (key, data, errors, context), the original arguments, possibly modified
"""
properties = [
('temporal_start',),
('temporal_end',)
]
properties = [('temporal_start',), ('temporal_end',)]
properties_present = (prop in data for prop in properties)
errors_present = (prop in errors for prop in properties)
error_message = 'temporal_start cannot be greater or equal to temporal_end'
......
......@@ -5,42 +5,44 @@ Module that provides validation methods for http uris.
"""
from urlparse import urlparse
import ckan.plugins.toolkit as tk
def is_uri(value):
def is_uri(key, data, errors, context):
"""
Checks if a given value is a valid URI. Will throw a ckan.plugins.toolkit.Invalid exception when
it cannot.
:param value: string, the value to check
:return: string, the original value
:param key:
:param data:
:param errors:
:param context:
:return:
"""
import ckanext.dcatdonl.validator.validator_factory as validator_factory
value = data[key]
validator_factory.get_validator('is_string')(value)
if not errors[key]:
errors[key] = []
parsed = urlparse(value)
if isinstance(value, list):
for uri in value:
if not _valid_uri(uri):
errors[key].append('value ' + uri + 'is not a valid uri')
if not all([parsed.scheme, parsed.netloc, parsed.path]):
raise tk.Invalid('value is not a valid uri')
return key, data, errors, context
return value
if not _valid_uri(value):
errors[key].append('value ' + value + 'is not a valid uri')
return key, data, errors, context
def are_uris(values):
"""
Checks if a given value is a list of URIs. Will throw a ckan.plugins.toolkit.Invalid exception
when it is not, or when the list contains invalid URIs.
:param values: list, a list of values to check
:return: list, the original list
def _valid_uri(uri):
"""
import ckanext.dcatdonl.validator.validator_factory as validator_factory
validator_factory.get_validator('is_list')(values)
Validates a given uri
for uri in values:
is_uri(uri)
:param uri:
:return:
"""
parsed = urlparse(uri)
return values
return all([parsed.scheme, parsed.netloc, parsed.path])
......@@ -26,7 +26,6 @@ _VALIDATORS = {
'is_date': temporal_validator.is_date,
'is_list': type_validator.is_list,
'is_dictionary': type_validator.is_dictionary,
'are_uris': uri_validator.are_uris,
'adms_changetype_created': changetype_validator.changetype_created,
'adms_changetype_updated': changetype_validator.changetype_updated,
'spatial_validation': spatial_validator.valid_spatial,
......@@ -50,7 +49,6 @@ _VALIDATORS = {
'overheid_spatial_postcodehuisnummer': spatial_validator.valid_postcodehuisnummer,
'donl_catalogs': valuelist_validator.donl_catalogs,
'donl_language': valuelist_validator.donl_language,
'donl_languages': valuelist_validator.donl_languages,
'donl_organization': valuelist_validator.donl_organization,
'mdr_filetype_nal': valuelist_validator.mdr_filetype_nal,
'iana_mediatypes': valuelist_validator.iana_mediatypes,
......
......@@ -29,7 +29,7 @@ def overheid_taxonomiebeleidsagenda(values):
:param values: list, the value to validate
:return: list, the original values, if they pass validation
"""
return _check_values('overheid_taxonomiebeleidsagenda', values, 1, 0)
return _check_value('overheid_taxonomiebeleidsagenda', values)
def overheid_openbaarheidsniveau(value):
......@@ -81,7 +81,7 @@ def overheid_spatial_scheme(value):
:param value: str, the value to validate
:return: str, the original value if it passes validation
"""
return _check_values('overheid_spatial_scheme', value, 1, 0, False)
return _check_value('overheid_spatial_scheme', value, 1, 0, False)
def overheid_spatial_gemeente(value):
......@@ -162,17 +162,6 @@ def donl_language(values):
return _check_value('donl_language', values)
def donl_languages(values):
"""
Validates values against the donl_language valuelist. Will throw a ckan.plugins.toolkit.Invalid
when values fail validation.
:param values: list, the value to validate
:return: list, the original values, if they pass validation
"""
return _check_values('donl_language', values, 1, 0)
def donl_organization(values):
"""
Validates value against the donl_organization valuelist. Will throw a
......@@ -236,15 +225,14 @@ def _check_value(name, value):
:param value: str, the value to check
:return: str, the original value if it passes validation
"""
if isinstance(value, list):
raise tk.Invalid('value must be a string')
valid_values = _read_file_as_json(name)
# SOLR stores lists as {data,..}, these should be converted to actual lists before reaching this
# code
if value.startswith('{') and value.endswith('}'):
raise tk.Invalid('value must be a string')
if isinstance(value, list):
for val in value:
if val not in valid_values:
raise tk.Invalid('value [' + val + '] is not a valid ' + name)
valid_values = _read_file_as_json(name)
return value
if value not in valid_values:
raise tk.Invalid('value [' + value + '] is not a valid ' + name)
......@@ -252,39 +240,6 @@ def _check_value(name, value):
return value
def _check_values(name, values, minimum=1, maximum=0, unique_check=True):
"""
Checks a given list of values against the given valuelist. Raises an
ckan.plugins.toolkit.Invalid if the values do not pass the validation.
:param name: str, the name of the valuelist
:param values: list, the values to check
:param minimum: int, the minimum length of the values list
:param maximum: int, the maximum length of the values list, or 0 if there is no maximum
:return: list, the original list of values if it passes validation
"""
if isinstance(values, basestring):
raise tk.Invalid('value must be a list')
if not len(values) >= minimum:
raise tk.Invalid('values do not meet the minimum requirements')
if 0 < maximum < len(values):
raise tk.Invalid('values do not meet the maximum requirements')
if unique_check:
if not len(values) == len(set(values)):
raise tk.Invalid('values must be unique')
valid_values = _read_file_as_json(name)
for index in values:
if index not in valid_values:
raise tk.Invalid('value [' + index + '] is not a valid ' + name)
return values
@cached
def _check_license_file(value):
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment