Merge pull request #206 from tdwg/update-build

Update build script
2018-10-15 13:39:19 +02:00 · 2018-10-15 13:39:19 +02:00 · 62af4272c7
parent 886b55fbdb e14dd43047
commit 62af4272c7
5 changed files with 2071 additions and 2890 deletions
--- a/build/README.md
+++ b/build/README.md
@ -1,16 +1,25 @@
-The build folder contains a python script to rebuild the terms index page.
+# Build script

-It will use the `vocabulary/term_versions.csv` and the `./config/terms_config.csv` to populate the html-template file (`./config/index.tmpl`)
+The build script `build.py` uses as input:
+
+* [vocabulary/term_versions.csv](../vocabulary/term_versions.csv): the list of terms
+* [docs/_layouts/terms.tmpl](../docs/_layouts/terms.tmpl): a Jinja2 template for the quick reference guide
+
+To create:
+
+* The quick reference guide as a Markdown file at [docs/terms/index.md](../docs/terms/index.md). The guide is build as Markdown (with a lot of included html) rather than html, so it can incorporated by Jekyll in the Darwin Core website (including a header, footer and table of content).
+* Two simple Darwin Core CSV files in [dist/](../dist/)

 ## Run the build script
-Install the required libraries for this script with pip once:

-```
-pip install -r requirements.txt
-```
+1. Install the required libraries (once):

-To run the script, run the build script from command line:
+    ```bash
+    pip install -r requirements.txt
+    ```

-```
-python build.py
-```
+2. Run the script from the command line:
+
+    ```bash
+    python build.py
+    ```
--- a/build/build.py
+++ b/build/build.py
@ -38,7 +38,7 @@ class DwcNamespaceError(Exception):
 class DwcBuildReader():

    def __init__(self, dwc_build_file):
-        """Custom Reader switching between to raw Github or local file"""
+        """Custom Reader switching between raw Github or local file"""
        self.dwc_build_file = dwc_build_file

    def __enter__(self):
@ -54,118 +54,99 @@ class DwcBuildReader():

 class DwcDigester(object):

-    def __init__(self, term_versions, terms_config):
-        """digest the normative document of Darwin Core and the configurations file to support automatic generation of derivatives
+    def __init__(self, term_versions):
+        """Digest the term document of Darwin Core to support automatic
+        generation of derivatives

        Parameters
        -----------
        term_versions : str
-            either a relative path and filename of the normative Dwc document or a URL link to the
-            raw Github version of the file
-        terms_config : str
-            either a relative path and filename of the configurations file or a URL link to the
-            raw Github version of the file
+            Either a relative path and filename of the normative Dwc document
+            or a URL link to the raw Github version of the file

        Notes
        -----
-        Remark that the sequence of the configurations file entries is essential for the automatic generation of the individual documents (mainly the index.html)
+        Remark that the sequence of the term versions entries is
+        essential for the automatic generation of the individual documents
+        (mainly the index.html)
        """
        self.term_versions = term_versions
-        self.terms_config = terms_config

        self.term_versions_data = {}
        self._store_versions()
-        self.terms_config_data = {}
-        self._store_configs()
-
-        # check for the ability to combine the data
-        self.match_error_report()

        # create the defined data-object for the different outputs
        self.template_data = self.process_terms()

    def versions(self):
-        """iterator providing the terms as represented in the normative term versions file"""
+        """Iterator providing the terms as represented in the normative term
+        versions file
+        """
        with DwcBuildReader(self.term_versions) as versions:
            for vterm in csv.DictReader(io.TextIOWrapper(versions), delimiter=','):
                if vterm["status"] == "recommended":
                    yield vterm

-    def configs(self):
-        """iterator providing the terms as represented in the terms config file
-        (taking into account the sequence)"""
-        with DwcBuildReader(self.terms_config) as configs:
-            for cfterm in csv.DictReader(io.TextIOWrapper(configs), delimiter=','):
-                yield cfterm
-
    def _store_versions(self):
-        """collect all the versions data in a dictionary as the term_versions_data attribute"""
+        """Collect all the versions data in a dictionary as the
+        term_versions_data attribute
+        """
        for term in self.versions():
            self.term_versions_data[term["term_iri"]] = term

-    def _store_configs(self):
-        """collect all the config data in a dictionary as the terms_config_data attribute"""
-        for term in self.configs():
-            self.terms_config_data[term["term_iri"]] = term
-
    @property
    def _version_terms(self):
-        """get an overview of the terms in the term_versions file"""
+        """Get an overview of the terms in the term_versions file
+        """
        return set(self.term_versions_data.keys())

-    @property
-    def _config_terms(self):
-        """get an overview of the terms in the terms config file"""
-        return set(self.terms_config_data.keys())
-
    def _select_versions_term(self, term_iri):
-        """select a specific term of the versions data, using term_iri match"""
+        """Select a specific term of the versions data, using term_iri match
+        """
        return self.term_versions_data[term_iri]

-    def _select_config_term(self, term_iri):
-        """select a specific term of the config data, using term_iri match"""
-        return self.terms_config_data[term_iri]
-
-    def match_error_report(self):
-        """check if the prime dwc file and the configurations file provide corresponding terms and inform user on the term differences in between both files"""
-        overload_versionterms = self._version_terms - self._config_terms
-        overload_configterms = self._config_terms - self._version_terms
-        if len(overload_versionterms) > 0 or len(overload_configterms) > 0:
-            vs_terms = ", ".join([term.split("/")[-1] for term in overload_versionterms])
-            cf_terms = ", ".join([term.split("/")[-1] for term in overload_configterms])
-            raise ProvidedTermsError("".join(["Terms only in term_versions.csv: ", vs_terms,
-                                              ". Terms only in terms_config.csv: ", cf_terms]))
    @staticmethod
    def split_iri(term_iri):
-        """split an iri field into the namespace url and the local name of the term"""
+        """Split an iri field into the namespace url and the local name
+        of the term
+        """
        prog = re.compile("(.*/)([^/]*$)")
        namespace, local_name = prog.findall(term_iri)[0]
        return namespace, local_name

    @staticmethod
    def resolve_namespace_abbrev(namespace):
-        """Using the NAMESPACE constant, get the namespace abbreviation by providing the namespace link"""
+        """Using the NAMESPACE constant, get the namespace abbreviation by
+        providing the namespace link
+
+        Parameters
+        -----------
+        namespace : str
+            valid key of the NAMESPACES variable
+        """
        if namespace not in NAMESPACES.keys():
            raise DwcNamespaceError("The namespace url is currently not supported in NAMESPACES")
        return NAMESPACES[namespace]

    def get_term_definition(self, term_iri):
-        """Extract the required information from both tables to show on the webpage of a single term
-        by using the term_iri as the identifier
+        """Extract the required information from the terms table to show on
+        the webpage of a single term by using the term_iri as the identifier

        Notes
        ------
-        Due to the current implementation, make sure to provide the same keys represented in the record-level specific version `process_terms` method (room for improvement)
+        Due to the current implementation, make sure to provide the same keys
+        represented in the record-level specific version `process_terms`
+        method (room for improvement)
        """
-        cf_term = self._select_config_term(term_iri)
        vs_term = self._select_versions_term(term_iri)

        term_data = {}
        term_data["label"] = vs_term['label']
        term_data["iri"] = term_iri
-        term_data["class"] = cf_term['organized_in']
+        term_data["class"] = vs_term['organized_in']
        term_data["definition"] = self.convert_link(vs_term['definition'])
-        term_data["comments"] = self.convert_link(self.convert_code(cf_term['comments']))
+        term_data["comments"] = self.convert_link(self.convert_code(vs_term['comments']))
+        term_data["examples"] = self.convert_link(self.convert_code(vs_term['examples']))
        term_data["rdf_type"] = vs_term['rdf_type']
        namespace_url, _ = self.split_iri(term_iri)
        term_data["namespace"] = self.resolve_namespace_abbrev(namespace_url)
@ -173,13 +154,15 @@ class DwcDigester(object):

    @staticmethod
    def convert_code(text_with_backticks):
-        """takes all back-quoted sections in a text field and converts it to the html tagged version of code blocks <code>...</code>
+        """Takes all back-quoted sections in a text field and converts it to
+        the html tagged version of code blocks <code>...</code>
        """
        return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)

    @staticmethod
    def convert_link(text_with_urls):
-        """takes all links in a text field and converts it to the html tagged version of the link
+        """Takes all links in a text field and converts it to the html tagged
+        version of the link
        """
        def _handle_matched(inputstring):
            """quick hack version of url handling on the current prime versions data"""
@ -190,11 +173,17 @@ class DwcDigester(object):
        return re.sub(regx, _handle_matched, text_with_urls)

    def process_terms(self):
-        """parse the config terms (sequence matters!), collect all required data from both the normative versions file and the config file and return the template ready data.
+        """Parse the config terms (sequence matters!)
+
+        Collect all required data from both the normative versions file and
+        the config file and return the template ready data.

        Returns
        -------
-        Data object that can be digested by the html-template file. Contains the term data formatted to create the indidivual outputs, each list element is a dictionary representing a class group. Hence, the data object is structured as follows:
+        Data object that can be digested by the html-template file. Contains
+        the term data formatted to create the indidivual outputs, each list
+        element is a dictionary representing a class group. Hence, the data
+        object is structured as follows:

            [
                {'name' : class_group_name_1, 'label': xxxx,...,
@ -222,7 +211,7 @@ class DwcDigester(object):
        class_group["terms"] = []
        class_group["namespace"] = None

-        for term in self.configs(): # sequence of the config file used as order
+        for term in self.versions(): # sequence of the terms file used as order
            term_data = self.get_term_definition(term['term_iri'])
            # new class encountered
            if term_data["rdf_type"] == "http://www.w3.org/2000/01/rdf-schema#Class":
@ -240,7 +229,8 @@ class DwcDigester(object):

    def create_html(self, html_template="../docs/_layouts/terms.tmpl",
                    html_output="../docs/terms/index.md"):
-        """build html with the processed term info, by filling in the tmpl-template
+        """build html with the processed term info, by filling in the
+        tmpl-template

        Parameters
        -----------
@ -254,7 +244,10 @@ class DwcDigester(object):
        data = {}
        data["class_groups"] = self.template_data

-        env = Environment(loader = FileSystemLoader(os.path.dirname(html_template)))
+        env = Environment(
+            loader = FileSystemLoader(os.path.dirname(html_template)),
+            trim_blocks = True
+        )
        template = env.get_template(os.path.basename(html_template))
        html = template.render(data)

@ -263,9 +256,11 @@ class DwcDigester(object):
        index_page.close()

    def simple_dwc_terms(self):
-        """only extract those terms that are simple dwc, defined as `simple` in the flags column of the config file of terms"""
+        """Only extract those terms that are simple dwc, defined as `simple`
+        in the flags column of the config file of terms
+        """
        properties = []
-        for term in self.configs():
+        for term in self.versions():
            term_data = self.get_term_definition(term['term_iri'])
            if (term_data["rdf_type"] == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property" and
                term["flags"] == "simple"):
@ -273,7 +268,7 @@ class DwcDigester(object):
        return properties

    def create_dwc_list(self, file_output="../dist/simple_dwc_vertical.csv"):
-        """build a list of simple dwc terms and write it to file
+        """Build a list of simple dwc terms and write it to file

        Parameters
        -----------
@ -285,7 +280,7 @@ class DwcDigester(object):
                dwc_list_file.write(term + "\n")

    def create_dwc_header(self, file_output="../dist/simple_dwc_horizontal.csv"):
-        """build a header of simple dwc terms and write it to file
+        """Build a header of simple dwc terms and write it to file

        Parameters
        -----------
@ -300,11 +295,10 @@ class DwcDigester(object):
 def main():
    """Building up the quick reference html and derivatives"""

-    config_terms_file = "./config/terms.csv"
    term_versions_file = "../vocabulary/term_versions.csv"

    print("Running build process:")
-    my_dwc = DwcDigester(term_versions_file, config_terms_file)
+    my_dwc = DwcDigester(term_versions_file)
    print("Building quick reference guide")
    my_dwc.create_html()
    print("Building simple DwC CSV files")
--- a/docs/_layouts/terms.tmpl
+++ b/docs/_layouts/terms.tmpl
@ -10,33 +10,38 @@ This page provides a list of all currently recommended terms of the Darwin Core
 {% for class_group in class_groups %}

 ## {{ class_group.label }}
-    
+
 <div class="my-4">
    {% for term in class_group.terms %}
-    <a class="btn btn-sm btn-outline-secondary m-1" href="#{{ term.namespace}}:{{ term.label }}">{{ term.label }}</a>
+    <a class="btn btn-sm btn-outline-secondary m-1" href="#{{ term.namespace }}:{{ term.label }}">{{ term.label }}</a>
    {% endfor %}
 </div>

 {% if class_group.iri is not none %}
-{# The above will exclude the Record-level class_group #}
-<table class="table table-sm border mb-3">
+{# Class (the above will exclude the Record-level class_group) #}
+<table class="table table-sm table-bordered">
    <tbody>
        <tr class="table-primary"><th colspan="2">{{ class_group.label }}<span class="badge badge-primary float-right">Class</span></th></tr>
        <tr><td class="label">Identifier</td><td><a href="{{ class_group.iri }}">{{ class_group.iri }}</a></td></tr>
-        <tr><td class="label">Definition</td><td>{{class_group.definition}}</td></tr>
-        <tr><td class="label">Comments</td><td>{{class_group.comments}}</td></tr>
+        <tr><td class="label">Definition</td><td>{{ class_group.definition }}</td></tr>
+        <tr><td class="label">Comments</td><td>{{ class_group.comments }}</td></tr>
+        <tr><td class="label">Examples</td><td>{{ class_group.examples }}</td></tr>
    </tbody>
 </table>
 {%endif %}

 {% for term in class_group.terms %}
-<a id="{{ term.namespace}}:{{ term.label }}" class="anchor"></a>
-<table class="table table-sm border mb-3">
+{# Term #}
+<p class="invisible">
+    <a id="{{ term.namespace }}:{{ term.label }}"></a>{% if term.namespace != "dwciri" %}<a id="{{ term.label }}"></a>{% endif %}
+</p>
+<table class="table table-sm table-bordered">
    <tbody>
        <tr class="table-secondary"><th colspan="2">{{ term.label }}<span class="badge badge-secondary float-right">Property</span></th></tr>
        <tr><td class="label">Identifier</td><td><a href="{{ term.iri }}">{{ term.iri }}</a></td></tr>
        <tr><td class="label">Definition</td><td>{{ term.definition }}</td></tr>
        <tr><td class="label">Comments</td><td>{{ term.comments }}</td></tr>
+        <tr><td class="label">Examples</td><td>{{ term.examples }}</td></tr>
    </tbody>
 </table>
 {% endfor %}
--- a/docs/terms/index.md
+++ b/docs/terms/index.md
--- a/vocabulary/term_versions.csv
+++ b/vocabulary/term_versions.csv