dwc/build/build.py

#
# S. Van Hoey
#
# Build script for tdwg dwc handling
#

import io
import os
import re
import csv
import sys
import codecs

from urllib import request
from jinja2 import FileSystemLoader, Environment

NAMESPACES = {
    'http://rs.tdwg.org/dwc/iri/' : 'dwciri',
    'http://rs.tdwg.org/dwc/terms/' : 'dwc',
    'http://purl.org/dc/elements/1.1/' : 'dc',
    'http://purl.org/dc/terms/' : 'dcterms',
    'http://rs.tdwg.org/dwc/terms/attributes/' : 'tdwgutility'}


class ProvidedTermsError(Exception):
    """inconsistency in the available terms Error"""
    pass


class RdfTypeError(Exception):
    """rdftype encountered that is not known by builder"""
    pass

class DwcNamespaceError(Exception):
    """Namespace link is not available in the currently provided links"""
    pass

class DwcBuildReader():

    def __init__(self, dwc_build_file):
        """Custom Reader switching between raw Github or local file"""
        self.dwc_build_file = dwc_build_file

    def __enter__(self):
        if "https://raw.github" in self.dwc_build_file:
            self.open_dwc_term = request.urlopen(self.dwc_build_file)
        else:
            self.open_dwc_term = open(self.dwc_build_file, 'rb')
        return self.open_dwc_term

    def __exit__(self, *args):
        self.open_dwc_term.close()


class DwcDigester(object):

    def __init__(self, term_versions):
        """Digest the term document of Darwin Core to support automatic
        generation of derivatives

        Parameters
        -----------
        term_versions : str
            Either a relative path and filename of the normative Dwc document
            or a URL link to the raw Github version of the file

        Notes
        -----
        Remark that the sequence of the term versions entries is
        essential for the automatic generation of the individual documents
        (mainly the index.html)
        """
        self.term_versions = term_versions

        self.term_versions_data = {}
        self._store_versions()

        # create the defined data-object for the different outputs
        self.template_data = self.process_terms()

    def versions(self):
        """Iterator providing the terms as represented in the normative term
        versions file
        """
        with DwcBuildReader(self.term_versions) as versions:
            for vterm in csv.DictReader(io.TextIOWrapper(versions), delimiter=','):
                if vterm["status"] == "recommended":
                    yield vterm

    def _store_versions(self):
        """Collect all the versions data in a dictionary as the
        term_versions_data attribute
        """
        for term in self.versions():
            self.term_versions_data[term["term_iri"]] = term

    @property
    def _version_terms(self):
        """Get an overview of the terms in the term_versions file
        """
        return set(self.term_versions_data.keys())

    def _select_versions_term(self, term_iri):
        """Select a specific term of the versions data, using term_iri match
        """
        return self.term_versions_data[term_iri]

    @staticmethod
    def split_iri(term_iri):
        """Split an iri field into the namespace url and the local name
        of the term
        """
        prog = re.compile("(.*/)([^/]*$)")
        namespace, local_name = prog.findall(term_iri)[0]
        return namespace, local_name

    @staticmethod
    def resolve_namespace_abbrev(namespace):
        """Using the NAMESPACE constant, get the namespace abbreviation by
        providing the namespace link

        Parameters
        -----------
        namespace : str
            valid key of the NAMESPACES variable
        """
        if namespace not in NAMESPACES.keys():
            raise DwcNamespaceError("The namespace url is currently not supported in NAMESPACES")
        return NAMESPACES[namespace]

    def get_term_definition(self, term_iri):
        """Extract the required information from the terms table to show on
        the webpage of a single term by using the term_iri as the identifier

        Notes
        ------
        Due to the current implementation, make sure to provide the same keys
        represented in the record-level specific version `process_terms`
        method (room for improvement)
        """
        vs_term = self._select_versions_term(term_iri)

        term_data = {}
        term_data["label"] = vs_term['term_localName'] # See https://github.com/tdwg/dwc/issues/253#issuecomment-670098202
        term_data["iri"] = term_iri
        term_data["class"] = vs_term['organized_in']
        term_data["definition"] = self.convert_link(vs_term['definition'])
        term_data["comments"] = self.convert_link(self.convert_code(vs_term['comments']))
        term_data["examples"] = self.convert_link(self.convert_code(vs_term['examples']))
        term_data["rdf_type"] = vs_term['rdf_type']
        namespace_url, _ = self.split_iri(term_iri)
        term_data["namespace"] = self.resolve_namespace_abbrev(namespace_url)
        return term_data

    @staticmethod
    def convert_code(text_with_backticks):
        """Takes all back-quoted sections in a text field and converts it to
        the html tagged version of code blocks <code>...</code>
        """
        return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)

    @staticmethod
    def convert_link(text_with_urls):
        """Takes all links in a text field and converts it to the html tagged
        version of the link
        """
        def _handle_matched(inputstring):
            """quick hack version of url handling on the current prime versions data"""
            url = inputstring.group()
            return "<a href=\"{}\">{}</a>".format(url, url)

        regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"
        return re.sub(regx, _handle_matched, text_with_urls)

    def process_terms(self):
        """Parse the config terms (sequence matters!)

        Collect all required data from both the normative versions file and
        the config file and return the template ready data.

        Returns
        -------
        Data object that can be digested by the html-template file. Contains
        the term data formatted to create the indidivual outputs, each list
        element is a dictionary representing a class group. Hence, the data
        object is structured as follows:

            [
                {'name' : class_group_name_1, 'label': xxxx,...,
                    'terms':
                        [
                            {'name' : term_1, 'label': xxxx,...},
                            {'name' : term_2, 'label': xxxx,...},
                            ...
                        ]}
                {'name' : class_group_name_2,...
                ...},
                ...
            ]
        """
        template_data = []
        in_class = "Record-level"
        # sequence matters in config and it starts with Record-level which we populate here ad-hoc
        class_group = {}
        class_group["label"] = "Record-level"
        class_group["iri"] = None
        class_group["class"] = None
        class_group["definition"] = None
        class_group["comments"] = None
        class_group["rdf_type"] = None
        class_group["terms"] = []
        class_group["namespace"] = None

        for term in self.versions(): # sequence of the terms file used as order
            term_data = self.get_term_definition(term['term_iri'])
            # new class encountered
            if term_data["rdf_type"] == "http://www.w3.org/2000/01/rdf-schema#Class":
                # store previous section in template_data
                template_data.append(class_group)
                #start new class group
                class_group = term_data
                class_group["terms"] = []
                in_class = term_data["label"] # check on the class working in
            else:
                class_group['terms'].append(term_data)
        # save the last class to template_data
        template_data.append(class_group)
        return template_data

    def create_html(self, html_template="terms.tmpl",
                    html_output="../docs/terms/index.md"):
        """build html with the processed term info, by filling in the
        tmpl-template

        Parameters
        -----------
        html_template : str
            relative path and filename to the Jinja2 compatible
            template
        html_output : str
            relative path and filename to write the resulting index.html
        """

        data = {}
        data["class_groups"] = self.template_data

        env = Environment(
            loader = FileSystemLoader(os.path.dirname(html_template)),
            trim_blocks = True
        )
        template = env.get_template(os.path.basename(html_template))
        html = template.render(data)

        index_page = open(html_output, "w")
        index_page.write(str(html))
        index_page.close()

    def simple_dwc_terms(self):
        """Only extract those terms that are simple dwc, defined as `simple`
        in the flags column of the config file of terms
        """
        properties = []
        for term in self.versions():
            term_data = self.get_term_definition(term['term_iri'])
            if (term_data["rdf_type"] == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property" and
                term["flags"] == "simple"):
                properties.append(term_data["label"])
        return properties

    def create_dwc_list(self, file_output="../dist/simple_dwc_vertical.csv"):
        """Build a list of simple dwc terms and write it to file

        Parameters
        -----------
        file_output : str
            relative path and filename to write the resulting list
        """
        with codecs.open(file_output, 'w', 'utf-8') as dwc_list_file:
            for term in self.simple_dwc_terms():
                dwc_list_file.write(term + "\n")

    def create_dwc_header(self, file_output="../dist/simple_dwc_horizontal.csv"):
        """Build a header of simple dwc terms and write it to file

        Parameters
        -----------
        file_output : str
            relative path and filename to write the resulting list
        """
        with codecs.open(file_output, 'w', 'utf-8') as dwc_header_file:
            properties = self.simple_dwc_terms()
            dwc_header_file.write(",".join(properties))
            dwc_header_file.write("\n")

def main():
    """Building up the quick reference html and derivatives"""

    term_versions_file = "../vocabulary/term_versions.csv"

    print("Running build process:")
    my_dwc = DwcDigester(term_versions_file)
    print("Building quick reference guide")
    my_dwc.create_html()
    print("Building simple DwC CSV files")
    my_dwc.create_dwc_list()
    my_dwc.create_dwc_header()
    print("Done!")


if __name__ == "__main__":
    sys.exit(main())
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`#`
			`# S. Van Hoey`
			`#`
			`# Build script for tdwg dwc handling`
			`#`

			`import io`
Convert to jinja2 API 2017-12-06 19:20:31 +00:00			`import os`
Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`import re`
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`import csv`
Convert to cmd utility 2017-09-30 15:13:14 +00:00			`import sys`
Add simple dwc derivatives 2017-10-01 01:55:07 +00:00			`import codecs`
Add classes for config and version handling 2017-09-30 04:44:59 +00:00
			`from urllib import request`
Convert to jinja2 API 2017-12-06 19:20:31 +00:00			`from jinja2 import FileSystemLoader, Environment`
Add template generation functions 2017-09-30 14:49:25 +00:00
Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`NAMESPACES = {`
			`'http://rs.tdwg.org/dwc/iri/' : 'dwciri',`
			`'http://rs.tdwg.org/dwc/terms/' : 'dwc',`
			`'http://purl.org/dc/elements/1.1/' : 'dc',`
			`'http://purl.org/dc/terms/' : 'dcterms',`
			`'http://rs.tdwg.org/dwc/terms/attributes/' : 'tdwgutility'}`

Add classes for config and version handling 2017-09-30 04:44:59 +00:00
			`class ProvidedTermsError(Exception):`
Improve docs and syntax 2017-10-01 14:08:14 +00:00			`"""inconsistency in the available terms Error"""`
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`pass`

Improve docs and syntax 2017-10-01 14:08:14 +00:00
version and config class handlers 2017-09-30 14:48:27 +00:00			`class RdfTypeError(Exception):`
Improve docs and syntax 2017-10-01 14:08:14 +00:00			`"""rdftype encountered that is not known by builder"""`
version and config class handlers 2017-09-30 14:48:27 +00:00			`pass`

Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`class DwcNamespaceError(Exception):`
			`"""Namespace link is not available in the currently provided links"""`
			`pass`
Add classes for config and version handling 2017-09-30 04:44:59 +00:00
			`class DwcBuildReader():`
version and config class handlers 2017-09-30 14:48:27 +00:00
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`def __init__(self, dwc_build_file):`
Update term layout - Drop mb-3: is default on .table - Use table-bordered (will border all cells rather than outline) - Combine anchors in an invisible paragraph (is rendered as paragraph from markdown anyway) - Only add regular anchor for none dwciri 2018-10-15 11:23:56 +00:00			`"""Custom Reader switching between raw Github or local file"""`
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`self.dwc_build_file = dwc_build_file`
version and config class handlers 2017-09-30 14:48:27 +00:00
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`def __enter__(self):`
			`if "https://raw.github" in self.dwc_build_file:`
			`self.open_dwc_term = request.urlopen(self.dwc_build_file)`
			`else:`
			`self.open_dwc_term = open(self.dwc_build_file, 'rb')`
			`return self.open_dwc_term`

			`def __exit__(self, *args):`
			`self.open_dwc_term.close()`


			`class DwcDigester(object):`
version and config class handlers 2017-09-30 14:48:27 +00:00
Fix #197 2018-10-14 10:09:40 +00:00			`def __init__(self, term_versions):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Digest the term document of Darwin Core to support automatic`
Fix #197 2018-10-14 10:09:40 +00:00			`generation of derivatives`
Improve docs and syntax 2017-10-01 14:08:14 +00:00
			`Parameters`
			`-----------`
			`term_versions : str`
Clean documentation 2018-10-14 10:14:05 +00:00			`Either a relative path and filename of the normative Dwc document`
			`or a URL link to the raw Github version of the file`
Improve docs and syntax 2017-10-01 14:08:14 +00:00
			`Notes`
			`-----`
Fix #197 2018-10-14 10:09:40 +00:00			`Remark that the sequence of the term versions entries is`
			`essential for the automatic generation of the individual documents`
			`(mainly the index.html)`
Improve docs and syntax 2017-10-01 14:08:14 +00:00			`"""`
version and config class handlers 2017-09-30 14:48:27 +00:00			`self.term_versions = term_versions`

			`self.term_versions_data = {}`
			`self._store_versions()`

Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`# create the defined data-object for the different outputs`
			`self.template_data = self.process_terms()`

Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`def versions(self):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Iterator providing the terms as represented in the normative term`
			`versions file`
			`"""`
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`with DwcBuildReader(self.term_versions) as versions:`
			`for vterm in csv.DictReader(io.TextIOWrapper(versions), delimiter=','):`
			`if vterm["status"] == "recommended":`
			`yield vterm`

version and config class handlers 2017-09-30 14:48:27 +00:00			`def _store_versions(self):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Collect all the versions data in a dictionary as the`
			`term_versions_data attribute`
			`"""`
version and config class handlers 2017-09-30 14:48:27 +00:00			`for term in self.versions():`
			`self.term_versions_data[term["term_iri"]] = term`

Represent term overviews as property 2017-10-01 14:58:00 +00:00			`@property`
Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`def _version_terms(self):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Get an overview of the terms in the term_versions file`
			`"""`
version and config class handlers 2017-09-30 14:48:27 +00:00			`return set(self.term_versions_data.keys())`

			`def _select_versions_term(self, term_iri):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Select a specific term of the versions data, using term_iri match`
			`"""`
version and config class handlers 2017-09-30 14:48:27 +00:00			`return self.term_versions_data[term_iri]`

Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`@staticmethod`
			`def split_iri(term_iri):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Split an iri field into the namespace url and the local name`
			`of the term`
			`"""`
Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`prog = re.compile("(./)([^/]$)")`
Use labels instead of local names in build script and template 2017-10-12 16:26:07 +00:00			`namespace, local_name = prog.findall(term_iri)[0]`
			`return namespace, local_name`
Add template generation functions 2017-09-30 14:49:25 +00:00
Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`@staticmethod`
			`def resolve_namespace_abbrev(namespace):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Using the NAMESPACE constant, get the namespace abbreviation by`
			`providing the namespace link`

			`Parameters`
			`-----------`
			`namespace : str`
			`valid key of the NAMESPACES variable`
			`"""`
Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`if namespace not in NAMESPACES.keys():`
			`raise DwcNamespaceError("The namespace url is currently not supported in NAMESPACES")`
			`return NAMESPACES[namespace]`

Update documentation 2017-10-01 15:32:49 +00:00			`def get_term_definition(self, term_iri):`
Fix #197 2018-10-14 10:09:40 +00:00			`"""Extract the required information from the terms table to show on`
			`the webpage of a single term by using the term_iri as the identifier`
Update documentation 2017-10-01 15:32:49 +00:00
			`Notes`
			`------`
Fix #197 2018-10-14 10:09:40 +00:00			`Due to the current implementation, make sure to provide the same keys`
			represented in the record-level specific version `process_terms`
			`method (room for improvement)`
Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`"""`
Make term definition work on iri instead of dict 2017-10-01 15:27:49 +00:00			`vs_term = self._select_versions_term(term_iri)`
Add template generation functions 2017-09-30 14:49:25 +00:00
			`term_data = {}`
Reference #253 in build script 2020-08-07 19:46:51 +00:00			`term_data["label"] = vs_term['term_localName'] # See https://github.com/tdwg/dwc/issues/253#issuecomment-670098202`
Use labels instead of local names in build script and template 2017-10-12 16:26:07 +00:00			`term_data["iri"] = term_iri`
Fix #197 2018-10-14 10:09:40 +00:00			`term_data["class"] = vs_term['organized_in']`
Add functionality that converts link to html tagged link 2017-10-02 20:31:33 +00:00			`term_data["definition"] = self.convert_link(vs_term['definition'])`
Fix #197 2018-10-14 10:09:40 +00:00			`term_data["comments"] = self.convert_link(self.convert_code(vs_term['comments']))`
Add examples 2018-10-15 11:24:32 +00:00			`term_data["examples"] = self.convert_link(self.convert_code(vs_term['examples']))`
Add template generation functions 2017-09-30 14:49:25 +00:00			`term_data["rdf_type"] = vs_term['rdf_type']`
Provide support for namespace abbreviations in data-model 2017-10-01 14:49:12 +00:00			`namespace_url, _ = self.split_iri(term_iri)`
Add support for namespace abbreviations 2017-10-01 14:58:17 +00:00			`term_data["namespace"] = self.resolve_namespace_abbrev(namespace_url)`
Add template generation functions 2017-09-30 14:49:25 +00:00			`return term_data`

Add convert functionality for backticks to html tags 2017-10-03 15:37:59 +00:00			`@staticmethod`
			`def convert_code(text_with_backticks):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Takes all back-quoted sections in a text field and converts it to`
			`the html tagged version of code blocks <code>...</code>`
Add convert functionality for backticks to html tags 2017-10-03 15:37:59 +00:00			`"""`
			return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)

Add functionality that converts link to html tagged link 2017-10-02 20:31:33 +00:00			`@staticmethod`
			`def convert_link(text_with_urls):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Takes all links in a text field and converts it to the html tagged`
			`version of the link`
Add functionality that converts link to html tagged link 2017-10-02 20:31:33 +00:00			`"""`
			`def _handle_matched(inputstring):`
			`"""quick hack version of url handling on the current prime versions data"""`
			`url = inputstring.group()`
			`return "<a href=\"{}\">{}</a>".format(url, url)`
Add convert functionality for backticks to html tags 2017-10-03 15:37:59 +00:00
Fix bug on handling < code 2017-10-03 16:53:20 +00:00			`regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"`
Add functionality that converts link to html tagged link 2017-10-02 20:31:33 +00:00			`return re.sub(regx, _handle_matched, text_with_urls)`

Add classes for config and version handling 2017-09-30 04:44:59 +00:00			`def process_terms(self):`
Fix #197 2018-10-14 10:09:40 +00:00			`"""Parse the config terms (sequence matters!)`

			`Collect all required data from both the normative versions file and`
			`the config file and return the template ready data.`
Update documentation 2017-10-01 15:32:49 +00:00
			`Returns`
			`-------`
Fix #197 2018-10-14 10:09:40 +00:00			`Data object that can be digested by the html-template file. Contains`
			`the term data formatted to create the indidivual outputs, each list`
			`element is a dictionary representing a class group. Hence, the data`
			`object is structured as follows:`
Add template generation functions 2017-09-30 14:49:25 +00:00
Update documentation 2017-10-01 15:32:49 +00:00			`[`
			`{'name' : class_group_name_1, 'label': xxxx,...,`
			`'terms':`
			`[`
			`{'name' : term_1, 'label': xxxx,...},`
			`{'name' : term_2, 'label': xxxx,...},`
			`...`
			`]}`
			`{'name' : class_group_name_2,...`
			`...},`
			`...`
			`]`
			`"""`
Add template generation functions 2017-09-30 14:49:25 +00:00			`template_data = []`
			`in_class = "Record-level"`
Update documentation 2017-10-01 15:32:49 +00:00			`# sequence matters in config and it starts with Record-level which we populate here ad-hoc`
Add template generation functions 2017-09-30 14:49:25 +00:00			`class_group = {}`
Update template to Bootstrap4 - Use dev.tdwg.org stylesheet and JS - Navbar only contains lik to TDWG - Sidebar is shown as fixed on bigger screens - Offset used for anchors - Terms are a definition list - List classes and terms (except Record-level) - Footer removed 2017-09-30 20:34:56 +00:00			`class_group["label"] = "Record-level"`
Use labels instead of local names in build script and template 2017-10-12 16:26:07 +00:00			`class_group["iri"] = None`
Update template to Bootstrap4 - Use dev.tdwg.org stylesheet and JS - Navbar only contains lik to TDWG - Sidebar is shown as fixed on bigger screens - Offset used for anchors - Terms are a definition list - List classes and terms (except Record-level) - Footer removed 2017-09-30 20:34:56 +00:00			`class_group["class"] = None`
			`class_group["definition"] = None`
			`class_group["comments"] = None`
			`class_group["rdf_type"] = None`
Add template generation functions 2017-09-30 14:49:25 +00:00			`class_group["terms"] = []`
Set namespace of Record-level to None There is no need for one 2017-10-06 23:08:42 +00:00			`class_group["namespace"] = None`
Add support for namespace abbreviations 2017-10-01 14:58:17 +00:00
Fix #197 2018-10-14 10:09:40 +00:00			`for term in self.versions(): # sequence of the terms file used as order`
Make term definition work on iri instead of dict 2017-10-01 15:27:49 +00:00			`term_data = self.get_term_definition(term['term_iri'])`
Add template generation functions 2017-09-30 14:49:25 +00:00			`# new class encountered`
			`if term_data["rdf_type"] == "http://www.w3.org/2000/01/rdf-schema#Class":`
			`# store previous section in template_data`
			`template_data.append(class_group)`
			`#start new class group`
			`class_group = term_data`
			`class_group["terms"] = []`
			`in_class = term_data["label"] # check on the class working in`
			`else:`
			`class_group['terms'].append(term_data)`
			`# save the last class to template_data`
			`template_data.append(class_group)`
			`return template_data`

Move terms.tmpl to build 2018-10-29 18:50:13 +00:00			`def create_html(self, html_template="terms.tmpl",`
Build quick ref guide as markdown Building the quick reference guide as a html file doesn't allow it to use the Jekyll templates (header, navbar, footer). We also can't reference those templates in the terms.tmpl as jinja2 and jekyll work slightly different. But by building the quick reference to a markdown document (with just the content, no html header etc.) it can be picked up by Jekyll like any other markdown file. The markdown file does contain mostly html, so we can style it exactly like we want. 2018-08-26 23:50:38 +00:00			`html_output="../docs/terms/index.md"):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""build html with the processed term info, by filling in the`
			`tmpl-template`
Improve docs and syntax 2017-10-01 14:08:14 +00:00
			`Parameters`
			`-----------`
			`html_template : str`
Convert to jinja2 API 2017-12-06 19:20:31 +00:00			`relative path and filename to the Jinja2 compatible`
Improve docs and syntax 2017-10-01 14:08:14 +00:00			`template`
			`html_output : str`
			`relative path and filename to write the resulting index.html`
			`"""`
Convert to cmd utility 2017-09-30 15:13:14 +00:00
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`data = {}`
Use class_group instead of group in template We generally refer to these groups as classes. class is a reserved name though, so have to use class_group instead, which is also the variable name used in the build script 2017-10-12 16:28:17 +00:00			`data["class_groups"] = self.template_data`
Convert to jinja2 API 2017-12-06 19:20:31 +00:00
Trim blocks 2018-10-15 11:24:09 +00:00			`env = Environment(`
			`loader = FileSystemLoader(os.path.dirname(html_template)),`
			`trim_blocks = True`
			`)`
Convert to jinja2 API 2017-12-06 19:20:31 +00:00			`template = env.get_template(os.path.basename(html_template))`
			`html = template.render(data)`
Add template generation functions 2017-09-30 14:49:25 +00:00
			`index_page = open(html_output, "w")`
			`index_page.write(str(html))`
			`index_page.close()`
Convert to cmd utility 2017-09-30 15:13:14 +00:00
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`def simple_dwc_terms(self):`
Clean documentation 2018-10-14 10:14:05 +00:00			"""Only extract those terms that are simple dwc, defined as `simple`
			`in the flags column of the config file of terms`
			`"""`
Add simple dwc derivatives 2017-10-01 01:55:07 +00:00			`properties = []`
Fix #197 2018-10-14 10:09:40 +00:00			`for term in self.versions():`
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`term_data = self.get_term_definition(term['term_iri'])`
Improve docs and syntax 2017-10-01 14:08:14 +00:00			`if (term_data["rdf_type"] == "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property" and`
			`term["flags"] == "simple"):`
Use labels instead of local names in build script and template 2017-10-12 16:26:07 +00:00			`properties.append(term_data["label"])`
Add simple dwc derivatives 2017-10-01 01:55:07 +00:00			`return properties`

Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`def create_dwc_list(self, file_output="../dist/simple_dwc_vertical.csv"):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Build a list of simple dwc terms and write it to file`
Update documentation 2017-10-01 15:32:49 +00:00
			`Parameters`
			`-----------`
			`file_output : str`
			`relative path and filename to write the resulting list`
			`"""`
Add simple dwc derivatives 2017-10-01 01:55:07 +00:00			`with codecs.open(file_output, 'w', 'utf-8') as dwc_list_file:`
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`for term in self.simple_dwc_terms():`
Bugfixes on simple dwc flag 2017-10-01 13:23:58 +00:00			`dwc_list_file.write(term + "\n")`
Add simple dwc derivatives 2017-10-01 01:55:07 +00:00
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`def create_dwc_header(self, file_output="../dist/simple_dwc_horizontal.csv"):`
Clean documentation 2018-10-14 10:14:05 +00:00			`"""Build a header of simple dwc terms and write it to file`
Update documentation 2017-10-01 15:32:49 +00:00
			`Parameters`
			`-----------`
			`file_output : str`
			`relative path and filename to write the resulting list`
			`"""`
Add simple dwc derivatives 2017-10-01 01:55:07 +00:00			`with codecs.open(file_output, 'w', 'utf-8') as dwc_header_file:`
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`properties = self.simple_dwc_terms()`
Add simple dwc derivatives 2017-10-01 01:55:07 +00:00			`dwc_header_file.write(",".join(properties))`
			`dwc_header_file.write("\n")`
Convert to cmd utility 2017-09-30 15:13:14 +00:00
			`def main():`
Improve docs and syntax 2017-10-01 14:08:14 +00:00			`"""Building up the quick reference html and derivatives"""`
Convert to cmd utility 2017-09-30 15:13:14 +00:00
Revert to using term_versions.csv for QRG build script 2020-08-13 08:08:11 +00:00			`term_versions_file = "../vocabulary/term_versions.csv"`
Convert to cmd utility 2017-09-30 15:13:14 +00:00
Rephrase print messages of build script 2017-10-06 23:08:21 +00:00			`print("Running build process:")`
Fix #197 2018-10-14 10:09:40 +00:00			`my_dwc = DwcDigester(term_versions_file)`
Rephrase print messages of build script 2017-10-06 23:08:21 +00:00			`print("Building quick reference guide")`
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`my_dwc.create_html()`
Rephrase print messages of build script 2017-10-06 23:08:21 +00:00			`print("Building simple DwC CSV files")`
Provide the template data as class attribute 2017-10-01 15:26:07 +00:00			`my_dwc.create_dwc_list()`
			`my_dwc.create_dwc_header()`
Rephrase print messages of build script 2017-10-06 23:08:21 +00:00			`print("Done!")`
Convert to cmd utility 2017-09-30 15:13:14 +00:00

			`if __name__ == "__main__":`
Update default build location terms index 2017-09-30 16:14:12 +00:00			`sys.exit(main())`