2017-09-30 04:44:59 +00:00
|
|
|
#
|
|
|
|
# S. Van Hoey
|
|
|
|
#
|
|
|
|
# Build script for tdwg dwc handling
|
|
|
|
#
|
|
|
|
|
|
|
|
import io
|
|
|
|
import csv
|
2017-09-30 15:13:14 +00:00
|
|
|
import sys
|
2017-09-30 04:44:59 +00:00
|
|
|
|
|
|
|
from urllib import request
|
|
|
|
|
2017-09-30 14:49:25 +00:00
|
|
|
from Cheetah.Template import Template
|
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
|
|
|
|
class ProvidedTermsError(Exception):
|
|
|
|
"""Inconsistency in the available terms Error"""
|
|
|
|
pass
|
|
|
|
|
2017-09-30 14:48:27 +00:00
|
|
|
class RdfTypeError(Exception):
|
|
|
|
"""Rdftype encountered that is not known by builder"""
|
|
|
|
pass
|
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
|
|
|
|
class DwcBuildReader():
|
2017-09-30 14:48:27 +00:00
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def __init__(self, dwc_build_file):
|
|
|
|
"""Custom Reader switching between to raw Github or local file"""
|
|
|
|
self.dwc_build_file = dwc_build_file
|
2017-09-30 14:48:27 +00:00
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def __enter__(self):
|
|
|
|
if "https://raw.github" in self.dwc_build_file:
|
|
|
|
self.open_dwc_term = request.urlopen(self.dwc_build_file)
|
|
|
|
else:
|
|
|
|
self.open_dwc_term = open(self.dwc_build_file, 'rb')
|
|
|
|
return self.open_dwc_term
|
|
|
|
|
|
|
|
def __exit__(self, *args):
|
|
|
|
self.open_dwc_term.close()
|
|
|
|
|
|
|
|
|
|
|
|
class DwcDigester(object):
|
2017-09-30 14:48:27 +00:00
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def __init__(self, term_versions, terms_config):
|
2017-09-30 14:48:27 +00:00
|
|
|
self.term_versions = term_versions
|
|
|
|
self.terms_config = terms_config
|
|
|
|
|
|
|
|
self.term_versions_data = {}
|
|
|
|
self._store_versions()
|
|
|
|
self.terms_config_data = {}
|
|
|
|
self._store_config()
|
|
|
|
|
|
|
|
# check for the ability to combine the data
|
|
|
|
self.match_error_report()
|
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def versions(self):
|
|
|
|
"""iterator to provide the terms as represented in the term versions file"""
|
|
|
|
with DwcBuildReader(self.term_versions) as versions:
|
|
|
|
for vterm in csv.DictReader(io.TextIOWrapper(versions), delimiter=','):
|
|
|
|
if vterm["status"] == "recommended":
|
|
|
|
yield vterm
|
|
|
|
|
|
|
|
def config(self):
|
2017-09-30 14:48:27 +00:00
|
|
|
"""iterator to provide the terms as represented in the terms config file
|
|
|
|
(including the sequence)"""
|
2017-09-30 04:44:59 +00:00
|
|
|
with DwcBuildReader(self.terms_config) as configs:
|
|
|
|
for cfterm in csv.DictReader(io.TextIOWrapper(configs), delimiter=','):
|
|
|
|
yield cfterm
|
2017-09-30 14:48:27 +00:00
|
|
|
|
|
|
|
def _store_versions(self):
|
|
|
|
"""collect all the versions data in a dictionary"""
|
|
|
|
for term in self.versions():
|
|
|
|
self.term_versions_data[term["term_iri"]] = term
|
|
|
|
|
|
|
|
def _store_config(self):
|
|
|
|
"""collect all the config data in a dictionary"""
|
|
|
|
for term in self.config():
|
|
|
|
self.terms_config_data[term["term_iri"]] = term
|
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def _version_terms(self):
|
|
|
|
"""get an overview of the terms in the term_versions file"""
|
2017-09-30 14:48:27 +00:00
|
|
|
return set(self.term_versions_data.keys())
|
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def _config_terms(self):
|
|
|
|
"""get an overview of the terms in the terms config file"""
|
2017-09-30 14:48:27 +00:00
|
|
|
return set(self.terms_config_data.keys())
|
|
|
|
|
|
|
|
def _select_versions_term(self, term_iri):
|
|
|
|
"""Select a specific term of the versions data, using term_iri match"""
|
|
|
|
return self.term_versions_data[term_iri]
|
|
|
|
|
|
|
|
def _select_config_term(self, term_iri):
|
|
|
|
"""Select a specific term of the config data, using term_iri match"""
|
|
|
|
return self.terms_config_data[term_iri]
|
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def match_error_report(self):
|
|
|
|
"""check if the prime dwc file and the humaninfo file provide corresponding terms"""
|
|
|
|
overload_versionterms = self._version_terms() - self._config_terms()
|
|
|
|
overload_configterms = self._config_terms() - self._version_terms()
|
|
|
|
if len(overload_versionterms) > 0 or len(overload_configterms) > 0:
|
|
|
|
vs_terms = ", ".join([term.split("/")[-1] for term in overload_versionterms])
|
|
|
|
cf_terms = ", ".join([term.split("/")[-1] for term in overload_configterms])
|
2017-09-30 14:49:25 +00:00
|
|
|
raise ProvidedTermsError("".join(["Terms only in term_versions.csv: ", vs_terms,
|
2017-09-30 04:44:59 +00:00
|
|
|
". Terms only in terms_config.csv: ", cf_terms]))
|
2017-09-30 14:49:25 +00:00
|
|
|
|
|
|
|
def get_term_definition(self, term):
|
|
|
|
"""Extract the required information to show on the webpage of a single term """
|
|
|
|
cf_term = term
|
|
|
|
vs_term = self._select_versions_term(term["term_iri"])
|
|
|
|
term_iri = term['term_iri']
|
|
|
|
|
|
|
|
term_data = {}
|
|
|
|
term_data["name"] = term_iri.split("/")[-1]
|
2017-09-30 20:34:56 +00:00
|
|
|
term_data["iri"] = term_iri
|
2017-09-30 14:49:25 +00:00
|
|
|
term_data["label"] = vs_term['label']
|
|
|
|
term_data["class"] = cf_term['organized_in']
|
|
|
|
term_data["definition"] = vs_term['definition']
|
2017-09-30 20:32:04 +00:00
|
|
|
term_data["comments"] = cf_term['comments']
|
2017-09-30 14:49:25 +00:00
|
|
|
term_data["rdf_type"] = vs_term['rdf_type']
|
|
|
|
return term_data
|
|
|
|
|
2017-09-30 04:44:59 +00:00
|
|
|
def process_terms(self):
|
|
|
|
"""parse the config terms towards the structure required for the HTML template"""
|
2017-09-30 14:49:25 +00:00
|
|
|
|
|
|
|
template_data = []
|
|
|
|
in_class = "Record-level"
|
|
|
|
# sequence matters in config and it starts with Record-level
|
|
|
|
class_group = {}
|
|
|
|
class_group["name"] = "Record-level"
|
2017-09-30 20:34:56 +00:00
|
|
|
class_group["iri"] = None
|
|
|
|
class_group["label"] = "Record-level"
|
|
|
|
class_group["class"] = None
|
|
|
|
class_group["definition"] = None
|
|
|
|
class_group["comments"] = None
|
|
|
|
class_group["rdf_type"] = None
|
2017-09-30 14:49:25 +00:00
|
|
|
class_group["terms"] = []
|
|
|
|
for term in self.config(): # sequence of the config file used as order
|
|
|
|
term_data = self.get_term_definition(term)
|
|
|
|
# new class encountered
|
|
|
|
if term_data["rdf_type"] == "http://www.w3.org/2000/01/rdf-schema#Class":
|
|
|
|
# store previous section in template_data
|
|
|
|
template_data.append(class_group)
|
|
|
|
#start new class group
|
|
|
|
class_group = term_data
|
|
|
|
class_group["terms"] = []
|
|
|
|
in_class = term_data["label"] # check on the class working in
|
|
|
|
else:
|
|
|
|
class_group['terms'].append(term_data)
|
|
|
|
# save the last class to template_data
|
|
|
|
template_data.append(class_group)
|
|
|
|
return template_data
|
|
|
|
|
|
|
|
@staticmethod
|
2017-09-30 16:14:12 +00:00
|
|
|
def create_html(template_data, html_template="./config/index.tmpl", html_output="../guides/index.html"):
|
2017-09-30 14:49:25 +00:00
|
|
|
"""build html with the processed term info"""
|
2017-09-30 15:13:14 +00:00
|
|
|
|
2017-09-30 14:49:25 +00:00
|
|
|
data={}
|
|
|
|
data["groups"] = template_data
|
|
|
|
html = Template(file=html_template, searchList=[data])
|
|
|
|
|
|
|
|
index_page = open(html_output, "w")
|
|
|
|
index_page.write(str(html))
|
|
|
|
index_page.close()
|
2017-09-30 15:13:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
"""Building up the html"""
|
|
|
|
|
2017-09-30 20:27:54 +00:00
|
|
|
config_terms_file = "./config/terms.csv"
|
2017-09-30 15:13:14 +00:00
|
|
|
term_versions_file = "../vocabulary/term_versions.csv"
|
|
|
|
|
|
|
|
print("Running build process using current term_versions and config_terms file...")
|
|
|
|
my_dwc = DwcDigester(term_versions_file, config_terms_file)
|
|
|
|
print("Building index html file...")
|
|
|
|
my_dwc.create_html(my_dwc.process_terms())
|
|
|
|
print("...done!")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2017-09-30 16:14:12 +00:00
|
|
|
sys.exit(main())
|