Commit 047b50b9 authored by Willem ter Berg's avatar Willem ter Berg

refactored to include wisdom from intellegunt and dataoverheid implementations

parent 2f44bd95
language: python
sudo: required
python:
- "2.7"
env: PGVERSION=9.1
install:
- bash bin/travis-build.bash
- pip install coveralls
script: sh bin/travis-run.sh
after_success:
- coveralls
This diff is collapsed.
include README.md
include LICENSE
include requirements.txt
recursive-include ckanext/dcatdonl *.html *.json *.js *.less *.css *.mo
\ No newline at end of file
#!/bin/bash
set -e
echo "This is travis-build.bash..."
echo "Installing the packages that CKAN requires..."
sudo apt-get update -qq
sudo apt-get install postgresql-$PGVERSION solr-jetty libcommons-fileupload-java:amd64=1.2.2-1
echo "Installing CKAN and its Python dependencies..."
git clone https://github.com/ckan/ckan
cd ckan
export latest_ckan_release_branch=`git branch --all | grep remotes/origin/release-v | sort -r | sed 's/remotes\/origin\///g' | head -n 1`
echo "CKAN branch: $latest_ckan_release_branch"
git checkout $latest_ckan_release_branch
python setup.py develop
pip install -r requirements.txt --allow-all-external
pip install -r dev-requirements.txt --allow-all-external
cd -
echo "Creating the PostgreSQL user and database..."
sudo -u postgres psql -c "CREATE USER ckan_default WITH PASSWORD 'pass';"
sudo -u postgres psql -c 'CREATE DATABASE ckan_test WITH OWNER ckan_default;'
echo "SOLR config..."
# Solr is multicore for tests on ckan master, but it's easier to run tests on
# Travis single-core. See https://github.com/ckan/ckan/issues/2972
sed -i -e 's/solr_url.*/solr_url = http:\/\/127.0.0.1:8983\/solr/' ckan/test-core.ini
echo "Initialising the database..."
cd ckan
paster db init -c test-core.ini
cd -
echo "Installing ckanext-dcatdonl and its requirements..."
python setup.py develop
pip install -r dev-requirements.txt
echo "Moving test.ini into a subdir..."
mkdir subdir
mv test.ini subdir
echo "travis-build.bash is done."
\ No newline at end of file
#!/bin/sh -e
echo "NO_START=0\nJETTY_HOST=127.0.0.1\nJETTY_PORT=8983\nJAVA_HOME=$JAVA_HOME" | sudo tee /etc/default/jetty
sudo cp ckan/ckan/config/solr/schema.xml /etc/solr/conf/schema.xml
sudo service jetty restart
nosetests --ckan \
--nologcapture \
--with-pylons=subdir/test.ini \
--with-coverage \
--cover-package=ckanext.dcatdonl \
--cover-inclusive \
--cover-erase \
--cover-tests
# encoding: utf-8
"""
Module that applies the DCAT-AP-DONL schema and its validation methods to the CKAN installation.
"""
import ckan.plugins as plugins
import ckan.plugins.toolkit as tk
import ckanext.dcatdonl.validator.validator_factory as validator_factory
import ckanext.dcatdonl.schema.schema_factory as schema_factory
class DONLPlugin(plugins.SingletonPlugin, tk.DefaultDatasetForm):
plugins.implements(plugins.IDatasetForm)
plugins.implements(plugins.IValidators)
plugins.implements(plugins.IPackageController, inherit=True)
# IValidators
def get_validators(self):
"""
Method that exposes custom made validators towards other parts of CKAN.
:return: dict, a dictionary with key/values pointing towards new validator methods
"""
return validator_factory.get_all_validators()
# IDatasetForm
def is_fallback(self):
"""
Method that indicates if this plug-in acts as the fallback plug-in.
:return: bool, A boolean indicating if this plug-in acts as the fallback plug-in
"""
return True
def package_types(self):
"""
Method that indicates which package types this plug-in supports. Will return an empty list
to indicate that this plug-in supports all package types.
:return: list, a list of supported package types
"""
return []
def create_package_schema(self):
"""
Updates the CKAN schema that will be used when creating a new dataset.
:return: dict, the updated CKAN schema
"""
schema = super(DONLPlugin, self).create_package_schema()
schema = schema_factory.get_schema('DCAT-AP-DONL').create_schema(schema)
return schema
def update_package_schema(self):
"""
Updates the CKAN schema that will be used when updating an existing dataset.
:return: dict, the updated CKAN schema
"""
schema = super(DONLPlugin, self).update_package_schema()
schema = schema_factory.get_schema('DCAT-AP-DONL').update_schema(schema)
return schema
def show_package_schema(self):
"""
Transforms the saved CKAN datasets into a more presentable form.
:return: dict, the updated CKAN schema
"""
schema = super(DONLPlugin, self).show_package_schema()
schema = schema_factory.get_schema('DCAT-AP-DONL').show_schema(schema)
return schema
def before_index(self, package_dict):
"""
Ensures that the multivalued properties will be send as lists to SOLR.
:param package_dict: dict, the original dictionary of the package
:return: dict, the original, possibly modified, dictionary of the package
"""
multivalued_properties = ['alternate_identifier', 'conforms_to', 'related_resource',
'source', 'version_notes', 'has_version', 'is_version_of',
'provenance', 'documentation', 'sample', 'theme',
'spatial_scheme', 'spatial_value']
for prop in multivalued_properties:
try:
package_dict[prop] = package_dict[prop].replace('{', '').replace('}', '').split(',')
except KeyError:
pass
resource_multivalued = ['language', 'download_url', 'linked_schemas', 'documentation']
for prop in resource_multivalued:
try:
for resource in package_dict['resources']:
resource[prop] = resource[prop].replace('{', '').replace('}', '').split(',')
except KeyError:
pass
return package_dict
# encoding: utf-8
"""
Module that provides an update mechanism for the various vocabularies defined in valuelists.
"""
import os
import logging
import urllib2
from time import sleep
logging.basicConfig(
filename=os.path.join(os.path.dirname(__file__), './log/ValuelistUpdater.log'),
level=logging.INFO,
format='%(asctime)s \t %(levelname)s \t %(message)s'
)
_VALUELISTS = {
'adms_changetype': 'https://waardelijsten.dcat-ap-donl.nl/adms_changetype.json',
'adms_distributiestatus': 'https://waardelijsten.dcat-ap-donl.nl/adms_distributiestatus.json',
'donl_catalogs': 'https://waardelijsten.dcat-ap-donl.nl/donl_catalogs.json',
'donl_language': 'https://waardelijsten.dcat-ap-donl.nl/donl_language.json',
'donl_organization': 'https://waardelijsten.dcat-ap-donl.nl/donl_organization.json',
'iana_mediatypes': 'https://waardelijsten.dcat-ap-donl.nl/iana_mediatypes.json',
'mdr_filetype_nal': 'https://waardelijsten.dcat-ap-donl.nl/mdr_filetype_nal.json',
'overheid_dataset_status': 'https://waardelijsten.dcat-ap-donl.nl/overheid_dataset_status.json',
'overheid_frequency': 'https://waardelijsten.dcat-ap-donl.nl/overheid_frequency.json',
'overheid_license': 'https://waardelijsten.dcat-ap-donl.nl/overheid_license.json',
'overheid_openbaarheidsniveau': 'https://waardelijsten.dcat-ap-donl.nl/overheid_openbaarheidsniveau.json',
'overheid_spatial_scheme': 'https://waardelijsten.dcat-ap-donl.nl/overheid_spatial_scheme.json',
'overheid_spatial_gemeente': 'https://waardelijsten.dcat-ap-donl.nl/overheid_spatial_gemeente.json',
'overheid_spatial_koninkrijksdeel': 'https://waardelijsten.dcat-ap-donl.nl/overheid_spatial_koninkrijksdeel.json',
'overheid_spatial_provincie': 'https://waardelijsten.dcat-ap-donl.nl/overheid_spatial_provincie.json',
'overheid_spatial_waterschap': 'https://waardelijsten.dcat-ap-donl.nl/overheid_spatial_waterschap.json',
'overheid_taxonomiebeleidsagenda': 'https://waardelijsten.dcat-ap-donl.nl/overheid_taxonomiebeleidsagenda.json'
}
def update_valuelist(name, online_resource):
"""
Updates a specific valuelist based on the specified online resource.
:param name: str, the name of the valuelist
:param online_resource: str, the url of the online resource
:return: void
"""
try:
resource = urllib2.urlopen(online_resource)
with open(_create_file_path(name), 'wb') as local_source:
local_source.write(resource.read())
logging.info('Successfully updated valuelist [%s].', name)
except urllib2.HTTPError:
logging.error('Failed to update valuelist [%s] from resource [%s]. '
'The online resource could not be reached.', name, online_resource)
except urllib2.URLError:
logging.error('Failed to update valuelist [%s] from resource [%s]. '
'The url appears to be invalid.', name, online_resource)
except Exception:
logging.error(
'Failed to update valuelist [%s] from resource [%s].', name, online_resource)
def _create_file_path(filename):
"""
Creates the absolute filepath to the given filename.
:param filename: str, the name of the file
:return: str, the absolute path to the given file
"""
return os.path.join(os.path.dirname(__file__), './resources/' + filename + '.json')
if __name__ == '__main__':
logging.info('ValuelistUpdater.py started')
for key, value in _VALUELISTS.items():
logging.info('Updating [%s] from [%s]', key, value)
update_valuelist(key, value)
sleep(2)
logging.info('ValuelistUpdater finished.')
# encoding: utf-8
"""
Exposes value conversion methods that perform operations on specified keys in dictionaries.
"""
import property_conversion
import default_converter
from property_conversion import remove_properties
_CONVERTERS = {
'convert_string_to_list': property_conversion.convert_string_to_list,
'convert_list_to_string': property_conversion.convert_list_to_string,
'default_conversion': default_converter.default
}
def get_all():
"""
Get and return all the defined converter methods.
:return: dict, A dictionary containing all the converter methods
"""
return _CONVERTERS
# encoding: utf-8
"""
Exposes functionality that allows default values to be set for fields in the package dictionary.
"""
def default(default_value, force=False):
"""
Creates a function which allows a field to fallback to a given default value.
:param default_value: The default value to set
:param force: Whether or not to force the default value regardless of a value already being
present
:return: function
"""
def default_setter(value):
"""
Sets the value to the given default value, assuming the original value is not set or the
default value is set to forced.
:param value: The original value
:return: The original, possibly modified, value
"""
return value if value and not force else default_value
return default_setter
# encoding: utf-8
"""
Exposes functionality that certain data types to be converted to other data types given certain
conditions.
"""
from ckanext.dcatdonl.helper import get_properties_to_remove, get_resource_properties_to_remove
def convert_string_to_list(key, data, errors, context):
"""
Converts a given property to a list, assuming the property has a value and that the value is
currently a string surrounded with curly brackets.
:param key: The key of the property
:param data: The dictionary containing the property
:param errors: The dictionary containing the validation errors of the data dictionary
:param context: The CKAN context of the current execution
:return: tuple, dict, dict, dict The original, possibly modified, arguments
"""
value = data.get(key, None)
if not value:
return key, data, errors, context
if not isinstance(value, basestring):
return key, data, errors, context
if not value.startswith('{') or not value.endswith('}'):
return key, data, errors, context
value = value.replace('"', '')
data[key] = value[1:len(value)-1].split(',')
return key, data, errors, context
def convert_list_to_string(key, data, errors, context):
"""
Converts a given property to a string, assuming the property has a value and that the value is
currently a list. The converted string will be surrounded by curly brackets.
:param key: The key of the property
:param data: The dictionary containing the property
:param errors: The dictionary containing the validation errors of the data dictionary
:param context: The CKAN context of the current execution
:return: tuple, dict, dict, dict The original, possibly modified, arguments
"""
value = data.get(key, None)
if not value:
return key, data, errors, context
if not isinstance(value, list):
return key, data, errors, context
data[key] = '{' + ','.join(map(str, value)) + '}'
return key, data, errors, context
def remove_properties(data_dict):
"""
Removes keys from a given dictionary. The keys that are removed are defined in the config.json
file in the root of this extension under the keys 'properties_to_remove' and
'resource_properties_to_remove'.
:param data_dict: The dictionary to remove the keys from
:return: The, possibly modified, original dictionary
"""
for prop in get_properties_to_remove():
data_dict.pop(prop, None)
if 'resources' in data_dict:
for resource in data_dict['resources']:
for prop in get_resource_properties_to_remove():
resource.pop(prop, None)
return data_dict
# encoding: utf-8
"""
Exposes functionality which assists other parts of the ckanext-dcatdonl extension with their
operations.
"""
from dictionary_merger import merge_dictionaries
from config import get_controlled_vocabularies
from config import get_properties_to_remove
from config import get_resource_properties_to_remove
from config import get_non_open_licenses
from config import in_debug_mode
from caching import cached
from multivalued_transformation import transform_multivalued_properties
from logger import log
# encoding: utf-8
"""
Module that declares a caching decorator. The cache is invalidated and rebuild on the first request
on a new day. Therefor a cached value expires after a maximum of 24 hours.
Provides method caching features.
"""
from datetime import datetime
......@@ -14,18 +14,19 @@ _CACHE_STORE = {}
def cached(cached_function):
"""
Provides a caching mechanism for function invocations.
Provides a caching annotation for methods to use. The results of methods are cached for up to 24
hours. The first invocation of any given day will invalidate the cache and rebuild the cache
based on a new actual execution of the method.
:param cached_function: function, the function output to cache
:return: the response of the function call
:param cached_function: The method to perform the caching on
:return: function, Returns a caching function
"""
def function_wrapper(*args):
"""
Checks if a given function invocation is cached, if it is, returns the cached result,
otherwise it computes the result, caches is and then returns that result.
Performs caching on the results of a method invocation with the given arguments.
:param args: list, the function arguments
:return: ?, the response of the function call
:param args: The arguments given to the method
:return: The cached result of the method invocation
"""
_current_date = int(datetime.strftime(datetime.now(), '%Y%m%d'))
......
# encoding: utf-8
"""
Reads and returns values defined in the config.json file located in the root of the
ckanext-dcatdonl extension.
"""
import os
import json
from caching import cached
def get_controlled_vocabularies():
"""
Retrieves and returns all the defined controlled vocabularies in a list format. This list
contains dictionaries, each dictionary represents a controlled vocabulary with the keys 'name',
'local' and 'online'. The local key defines the filename of the controlled vocabulary as it is
in the ckanext/dcatdonl/resources/controlled_vocabularies directory. The online key contains
the URL to the online version of the controlled vocabulary.
:return: list, A list of dictionaries containing controlled vocabularies
"""
config = _load_config_file()
return config['controlled_vocabularies']
def get_properties_to_remove():
"""
Retrieves and returns all the defined properties to remove from the CKAN package dictionary
before it is returned to the end-user.
:return: list, A list of strings containing the properties to remove
"""
config = _load_config_file()
return config['properties_to_remove']
def get_resource_properties_to_remove():
"""
Retrieves and returns all the defined properties to remove from the CKAN resource dictionary
before it is returned to the end-user.
:return: list, A list of strings containing the properties to remove
"""
config = _load_config_file()
return config['resource_properties_to_remove']
def get_non_open_licenses():
"""
Retrieves and returns all the licenses which are considered 'non-open'.
:return: list, A list of license values which are considered 'non-open'
"""
config = _load_config_file()
return config['non_open_licenses']
def in_debug_mode():
"""
Retrieves and returns whether or not the ckanext-dcatdonl extension is running in debug
mode.
:return: bool, True or false depending on the mode
"""
config = _load_config_file()
return config['debug']
@cached
def _load_config_file():
"""
Reads the config.json config file in the root of the ckanext-dcatdonl extension. The result
of this reading operating is cached for up to 24 hours.
:return: dict, The contents of the config.json file
"""
filepath = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'config.json')
with open(filepath, 'r') as config_file:
return json.load(config_file)
# encoding: utf-8
"""
Exposes functionality which allows dictionaries to be merged.
"""
def merge_dictionaries(first, second):
"""
Merges two dictionaries and returns the merged dictionary.
:param first: dict, The dictionary to merge
:param second: dict, The dictionary to merge
:return: dict, The merged dictionary
"""
merged = first.copy()
merged.update(second)
return merged
# encoding: utf-8
"""
Exposes logging functionality to the ckanext-dcatdonl extension.
"""
import logging
from ckanext.dcatdonl.helper import in_debug_mode
logger = logging.getLogger('ckanext.dcatdonl')
def log(message):
"""
Logs a given message to the logger, assuming the extension is running in debug mode.
:param message: string, The message to log
:return: void
"""
if in_debug_mode():
logger.info(message)
# encoding: utf-8
"""
Exposes functionality which ensures that multivalued properties are correctly send to the Solr
installation.
"""
def transform_multivalued_properties(data_dict):
"""
Performs several transformations on properties.
- all multivalued properties are converted from strings to lists
- all datetime properties are transformed to the Solr datetime format
:param data_dict: dict, The original dictionary
:return: dict, The modified dictionary
"""
for prop in ['alternate_identifier', 'conforms_to', 'related_resource', 'source',
'version_notes', 'has_version', 'is_version_of', 'provenance', 'documentation',
'sample', 'theme', 'spatial_scheme', 'spatial_value', 'language']:
try:
data_dict[prop] = data_dict[prop].replace('{', '').replace('}', '').split(',')
except KeyError:
continue
for prop in ['temporal_start', 'temporal_end', 'date_planned', 'issued', 'modified']:
try:
data_dict[prop] = u'{}Z'.format(data_dict[prop])
except KeyError:
continue
for prop in ['language', 'download_url', 'linked_schemas', 'documentation']:
try:
for resource in data_dict['resources']:
resource[prop] = resource[prop].replace('{', '').replace('}', '')
except KeyError:
continue
for prop in ['release_date', 'modification_date']: