mirror of https://github.com/tdwg/dwc.git
commit
62af4272c7
|
@ -1,16 +1,25 @@
|
|||
The build folder contains a python script to rebuild the terms index page.
|
||||
# Build script
|
||||
|
||||
It will use the `vocabulary/term_versions.csv` and the `./config/terms_config.csv` to populate the html-template file (`./config/index.tmpl`)
|
||||
The build script `build.py` uses as input:
|
||||
|
||||
* [vocabulary/term_versions.csv](../vocabulary/term_versions.csv): the list of terms
|
||||
* [docs/_layouts/terms.tmpl](../docs/_layouts/terms.tmpl): a Jinja2 template for the quick reference guide
|
||||
|
||||
To create:
|
||||
|
||||
* The quick reference guide as a Markdown file at [docs/terms/index.md](../docs/terms/index.md). The guide is build as Markdown (with a lot of included html) rather than html, so it can incorporated by Jekyll in the Darwin Core website (including a header, footer and table of content).
|
||||
* Two simple Darwin Core CSV files in [dist/](../dist/)
|
||||
|
||||
## Run the build script
|
||||
Install the required libraries for this script with pip once:
|
||||
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
1. Install the required libraries (once):
|
||||
|
||||
To run the script, run the build script from command line:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
```
|
||||
python build.py
|
||||
```
|
||||
2. Run the script from the command line:
|
||||
|
||||
```bash
|
||||
python build.py
|
||||
```
|
||||
|
|
134
build/build.py
134
build/build.py
|
@ -38,7 +38,7 @@ class DwcNamespaceError(Exception):
|
|||
class DwcBuildReader():
|
||||
|
||||
def __init__(self, dwc_build_file):
|
||||
"""Custom Reader switching between to raw Github or local file"""
|
||||
"""Custom Reader switching between raw Github or local file"""
|
||||
self.dwc_build_file = dwc_build_file
|
||||
|
||||
def __enter__(self):
|
||||
|
@ -54,118 +54,99 @@ class DwcBuildReader():
|
|||
|
||||
class DwcDigester(object):
|
||||
|
||||
def __init__(self, term_versions, terms_config):
|
||||
"""digest the normative document of Darwin Core and the configurations file to support automatic generation of derivatives
|
||||
def __init__(self, term_versions):
|
||||
"""Digest the term document of Darwin Core to support automatic
|
||||
generation of derivatives
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
term_versions : str
|
||||
either a relative path and filename of the normative Dwc document or a URL link to the
|
||||
raw Github version of the file
|
||||
terms_config : str
|
||||
either a relative path and filename of the configurations file or a URL link to the
|
||||
raw Github version of the file
|
||||
Either a relative path and filename of the normative Dwc document
|
||||
or a URL link to the raw Github version of the file
|
||||
|
||||
Notes
|
||||
-----
|
||||
Remark that the sequence of the configurations file entries is essential for the automatic generation of the individual documents (mainly the index.html)
|
||||
Remark that the sequence of the term versions entries is
|
||||
essential for the automatic generation of the individual documents
|
||||
(mainly the index.html)
|
||||
"""
|
||||
self.term_versions = term_versions
|
||||
self.terms_config = terms_config
|
||||
|
||||
self.term_versions_data = {}
|
||||
self._store_versions()
|
||||
self.terms_config_data = {}
|
||||
self._store_configs()
|
||||
|
||||
# check for the ability to combine the data
|
||||
self.match_error_report()
|
||||
|
||||
# create the defined data-object for the different outputs
|
||||
self.template_data = self.process_terms()
|
||||
|
||||
def versions(self):
|
||||
"""iterator providing the terms as represented in the normative term versions file"""
|
||||
"""Iterator providing the terms as represented in the normative term
|
||||
versions file
|
||||
"""
|
||||
with DwcBuildReader(self.term_versions) as versions:
|
||||
for vterm in csv.DictReader(io.TextIOWrapper(versions), delimiter=','):
|
||||
if vterm["status"] == "recommended":
|
||||
yield vterm
|
||||
|
||||
def configs(self):
|
||||
"""iterator providing the terms as represented in the terms config file
|
||||
(taking into account the sequence)"""
|
||||
with DwcBuildReader(self.terms_config) as configs:
|
||||
for cfterm in csv.DictReader(io.TextIOWrapper(configs), delimiter=','):
|
||||
yield cfterm
|
||||
|
||||
def _store_versions(self):
|
||||
"""collect all the versions data in a dictionary as the term_versions_data attribute"""
|
||||
"""Collect all the versions data in a dictionary as the
|
||||
term_versions_data attribute
|
||||
"""
|
||||
for term in self.versions():
|
||||
self.term_versions_data[term["term_iri"]] = term
|
||||
|
||||
def _store_configs(self):
|
||||
"""collect all the config data in a dictionary as the terms_config_data attribute"""
|
||||
for term in self.configs():
|
||||
self.terms_config_data[term["term_iri"]] = term
|
||||
|
||||
@property
|
||||
def _version_terms(self):
|
||||
"""get an overview of the terms in the term_versions file"""
|
||||
"""Get an overview of the terms in the term_versions file
|
||||
"""
|
||||
return set(self.term_versions_data.keys())
|
||||
|
||||
@property
|
||||
def _config_terms(self):
|
||||
"""get an overview of the terms in the terms config file"""
|
||||
return set(self.terms_config_data.keys())
|
||||
|
||||
def _select_versions_term(self, term_iri):
|
||||
"""select a specific term of the versions data, using term_iri match"""
|
||||
"""Select a specific term of the versions data, using term_iri match
|
||||
"""
|
||||
return self.term_versions_data[term_iri]
|
||||
|
||||
def _select_config_term(self, term_iri):
|
||||
"""select a specific term of the config data, using term_iri match"""
|
||||
return self.terms_config_data[term_iri]
|
||||
|
||||
def match_error_report(self):
|
||||
"""check if the prime dwc file and the configurations file provide corresponding terms and inform user on the term differences in between both files"""
|
||||
overload_versionterms = self._version_terms - self._config_terms
|
||||
overload_configterms = self._config_terms - self._version_terms
|
||||
if len(overload_versionterms) > 0 or len(overload_configterms) > 0:
|
||||
vs_terms = ", ".join([term.split("/")[-1] for term in overload_versionterms])
|
||||
cf_terms = ", ".join([term.split("/")[-1] for term in overload_configterms])
|
||||
raise ProvidedTermsError("".join(["Terms only in term_versions.csv: ", vs_terms,
|
||||
". Terms only in terms_config.csv: ", cf_terms]))
|
||||
@staticmethod
|
||||
def split_iri(term_iri):
|
||||
"""split an iri field into the namespace url and the local name of the term"""
|
||||
"""Split an iri field into the namespace url and the local name
|
||||
of the term
|
||||
"""
|
||||
prog = re.compile("(.*/)([^/]*$)")
|
||||
namespace, local_name = prog.findall(term_iri)[0]
|
||||
return namespace, local_name
|
||||
|
||||
@staticmethod
|
||||
def resolve_namespace_abbrev(namespace):
|
||||
"""Using the NAMESPACE constant, get the namespace abbreviation by providing the namespace link"""
|
||||
"""Using the NAMESPACE constant, get the namespace abbreviation by
|
||||
providing the namespace link
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
namespace : str
|
||||
valid key of the NAMESPACES variable
|
||||
"""
|
||||
if namespace not in NAMESPACES.keys():
|
||||
raise DwcNamespaceError("The namespace url is currently not supported in NAMESPACES")
|
||||
return NAMESPACES[namespace]
|
||||
|
||||
def get_term_definition(self, term_iri):
|
||||
"""Extract the required information from both tables to show on the webpage of a single term
|
||||
by using the term_iri as the identifier
|
||||
"""Extract the required information from the terms table to show on
|
||||
the webpage of a single term by using the term_iri as the identifier
|
||||
|
||||
Notes
|
||||
------
|
||||
Due to the current implementation, make sure to provide the same keys represented in the record-level specific version `process_terms` method (room for improvement)
|
||||
Due to the current implementation, make sure to provide the same keys
|
||||
represented in the record-level specific version `process_terms`
|
||||
method (room for improvement)
|
||||
"""
|
||||
cf_term = self._select_config_term(term_iri)
|
||||
vs_term = self._select_versions_term(term_iri)
|
||||
|
||||
term_data = {}
|
||||
term_data["label"] = vs_term['label']
|
||||
term_data["iri"] = term_iri
|
||||
term_data["class"] = cf_term['organized_in']
|
||||
term_data["class"] = vs_term['organized_in']
|
||||
term_data["definition"] = self.convert_link(vs_term['definition'])
|
||||
term_data["comments"] = self.convert_link(self.convert_code(cf_term['comments']))
|
||||
term_data["comments"] = self.convert_link(self.convert_code(vs_term['comments']))
|
||||
term_data["examples"] = self.convert_link(self.convert_code(vs_term['examples']))
|
||||
term_data["rdf_type"] = vs_term['rdf_type']
|
||||
namespace_url, _ = self.split_iri(term_iri)
|
||||
term_data["namespace"] = self.resolve_namespace_abbrev(namespace_url)
|
||||
|
@ -173,13 +154,15 @@ class DwcDigester(object):
|
|||
|
||||
@staticmethod
|
||||
def convert_code(text_with_backticks):
|
||||
"""takes all back-quoted sections in a text field and converts it to the html tagged version of code blocks <code>...</code>
|
||||
"""Takes all back-quoted sections in a text field and converts it to
|
||||
the html tagged version of code blocks <code>...</code>
|
||||
"""
|
||||
return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)
|
||||
|
||||
@staticmethod
|
||||
def convert_link(text_with_urls):
|
||||
"""takes all links in a text field and converts it to the html tagged version of the link
|
||||
"""Takes all links in a text field and converts it to the html tagged
|
||||
version of the link
|
||||
"""
|
||||
def _handle_matched(inputstring):
|
||||
"""quick hack version of url handling on the current prime versions data"""
|
||||
|
@ -190,11 +173,17 @@ class DwcDigester(object):
|
|||
return re.sub(regx, _handle_matched, text_with_urls)
|
||||
|
||||
def process_terms(self):
|
||||
"""parse the config terms (sequence matters!), collect all required data from both the normative versions file and the config file and return the template ready data.
|
||||
"""Parse the config terms (sequence matters!)
|
||||
|
||||
Collect all required data from both the normative versions file and
|
||||
the config file and return the template ready data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Data object that can be digested by the html-template file. Contains the term data formatted to create the indidivual outputs, each list element is a dictionary representing a class group. Hence, the data object is structured as follows:
|
||||
Data object that can be digested by the html-template file. Contains
|
||||
the term data formatted to create the indidivual outputs, each list
|
||||
element is a dictionary representing a class group. Hence, the data
|
||||
object is structured as follows:
|
||||
|
||||
[
|
||||
{'name' : class_group_name_1, 'label': xxxx,...,
|
||||
|
@ -222,7 +211,7 @@ class DwcDigester(object):
|
|||
class_group["terms"] = []
|
||||
class_group["namespace"] = None
|
||||
|
||||
for term in self.configs(): # sequence of the config file used as order
|
||||
for term in self.versions(): # sequence of the terms file used as order
|
||||
term_data = self.get_term_definition(term['term_iri'])
|
||||
# new class encountered
|
||||
if term_data["rdf_type"] == "http://www.w3.org/2000/01/rdf-schema#Class":
|
||||
|
@ -240,7 +229,8 @@ class DwcDigester(object):
|
|||
|
||||
def create_html(self, html_template="../docs/_layouts/terms.tmpl",
|
||||
html_output="../docs/terms/index.md"):
|
||||
"""build html with the processed term info, by filling in the tmpl-template
|
||||
"""build html with the processed term info, by filling in the
|
||||
tmpl-template
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
|
@ -254,7 +244,10 @@ class DwcDigester(object):
|
|||
data = {}
|
||||
data["class_groups"] = self.template_data
|
||||
|
||||
env = Environment(loader = FileSystemLoader(os.path.dirname(html_template)))
|
||||
env = Environment(
|
||||
loader = FileSystemLoader(os.path.dirname(html_template)),
|
||||
trim_blocks = True
|
||||
)
|
||||
template = env.get_template(os.path.basename(html_template))
|
||||
html = template.render(data)
|
||||
|
||||
|
@ -263,9 +256,11 @@ class DwcDigester(object):
|
|||
index_page.close()
|
||||
|
||||
def simple_dwc_terms(self):
|
||||
"""only extract those terms that are simple dwc, defined as `simple` in the flags column of the config file of terms"""
|
||||
"""Only extract those terms that are simple dwc, defined as `simple`
|
||||
in the flags column of the config file of terms
|
||||
"""
|
||||
properties = []
|
||||
for term in self.configs():
|
||||
for term in self.versions():
|
||||
term_data = self.get_term_definition(term['term_iri'])
|
||||
if (term_data["rdf_type"] == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property" and
|
||||
term["flags"] == "simple"):
|
||||
|
@ -273,7 +268,7 @@ class DwcDigester(object):
|
|||
return properties
|
||||
|
||||
def create_dwc_list(self, file_output="../dist/simple_dwc_vertical.csv"):
|
||||
"""build a list of simple dwc terms and write it to file
|
||||
"""Build a list of simple dwc terms and write it to file
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
|
@ -285,7 +280,7 @@ class DwcDigester(object):
|
|||
dwc_list_file.write(term + "\n")
|
||||
|
||||
def create_dwc_header(self, file_output="../dist/simple_dwc_horizontal.csv"):
|
||||
"""build a header of simple dwc terms and write it to file
|
||||
"""Build a header of simple dwc terms and write it to file
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
|
@ -300,11 +295,10 @@ class DwcDigester(object):
|
|||
def main():
|
||||
"""Building up the quick reference html and derivatives"""
|
||||
|
||||
config_terms_file = "./config/terms.csv"
|
||||
term_versions_file = "../vocabulary/term_versions.csv"
|
||||
|
||||
print("Running build process:")
|
||||
my_dwc = DwcDigester(term_versions_file, config_terms_file)
|
||||
my_dwc = DwcDigester(term_versions_file)
|
||||
print("Building quick reference guide")
|
||||
my_dwc.create_html()
|
||||
print("Building simple DwC CSV files")
|
||||
|
|
|
@ -10,33 +10,38 @@ This page provides a list of all currently recommended terms of the Darwin Core
|
|||
{% for class_group in class_groups %}
|
||||
|
||||
## {{ class_group.label }}
|
||||
|
||||
|
||||
<div class="my-4">
|
||||
{% for term in class_group.terms %}
|
||||
<a class="btn btn-sm btn-outline-secondary m-1" href="#{{ term.namespace}}:{{ term.label }}">{{ term.label }}</a>
|
||||
<a class="btn btn-sm btn-outline-secondary m-1" href="#{{ term.namespace }}:{{ term.label }}">{{ term.label }}</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{% if class_group.iri is not none %}
|
||||
{# The above will exclude the Record-level class_group #}
|
||||
<table class="table table-sm border mb-3">
|
||||
{# Class (the above will exclude the Record-level class_group) #}
|
||||
<table class="table table-sm table-bordered">
|
||||
<tbody>
|
||||
<tr class="table-primary"><th colspan="2">{{ class_group.label }}<span class="badge badge-primary float-right">Class</span></th></tr>
|
||||
<tr><td class="label">Identifier</td><td><a href="{{ class_group.iri }}">{{ class_group.iri }}</a></td></tr>
|
||||
<tr><td class="label">Definition</td><td>{{class_group.definition}}</td></tr>
|
||||
<tr><td class="label">Comments</td><td>{{class_group.comments}}</td></tr>
|
||||
<tr><td class="label">Definition</td><td>{{ class_group.definition }}</td></tr>
|
||||
<tr><td class="label">Comments</td><td>{{ class_group.comments }}</td></tr>
|
||||
<tr><td class="label">Examples</td><td>{{ class_group.examples }}</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
{%endif %}
|
||||
|
||||
{% for term in class_group.terms %}
|
||||
<a id="{{ term.namespace}}:{{ term.label }}" class="anchor"></a>
|
||||
<table class="table table-sm border mb-3">
|
||||
{# Term #}
|
||||
<p class="invisible">
|
||||
<a id="{{ term.namespace }}:{{ term.label }}"></a>{% if term.namespace != "dwciri" %}<a id="{{ term.label }}"></a>{% endif %}
|
||||
</p>
|
||||
<table class="table table-sm table-bordered">
|
||||
<tbody>
|
||||
<tr class="table-secondary"><th colspan="2">{{ term.label }}<span class="badge badge-secondary float-right">Property</span></th></tr>
|
||||
<tr><td class="label">Identifier</td><td><a href="{{ term.iri }}">{{ term.iri }}</a></td></tr>
|
||||
<tr><td class="label">Definition</td><td>{{ term.definition }}</td></tr>
|
||||
<tr><td class="label">Comments</td><td>{{ term.comments }}</td></tr>
|
||||
<tr><td class="label">Examples</td><td>{{ term.examples }}</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
{% endfor %}
|
||||
|
|
3653
docs/terms/index.md
3653
docs/terms/index.md
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue