@ -0,0 +1,423 @@
"cells": [
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Script to build Markdown pages that provide term metadata for simple vocabularies\n",
"# Steve Baskauf 2020-06-28 CC0\n",
"# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the repo from the TDWG Github site\n",
"# Note: this script calls a function from, which requires importing the requests, csv, and json modules\n",
"import re\n",
"import requests # best library to manage HTTP transactions\n",
"import csv # library to read/write/parse CSV files\n",
"import json # library to convert JSON to Python data structures\n",
"import pandas as pd\n",
"# -----------------\n",
"# Configuration section\n",
"# -----------------\n",
"# !!!! Note !!!!\n",
"# This is an example of a simple vocabulary without categories. For a complex example\n",
"# with multiple namespaces and several categories, see build-page-categories.ipynb\n",
"# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub. In this example,\n",
"# the branch is named \"pathway\"\n",
"githubBaseUri = ''\n",
"headerFileName = ''\n",
"footerFileName = ''\n",
"outFileName = '../../docs/doe/'\n",
"# This is a Python list of the database names of the term lists to be included in the document.\n",
"termLists = ['degreeOfEstablishment']\n",
"# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ\n",
"# However, there probably aren't any circumstances where mixed types will be used to generate the same page.\n",
"vocab_type = 2 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy\n",
"# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass\n",
"# If so, those categories can be used to group terms in the generated term list document.\n",
"organized_in_categories = False\n",
"# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass\n",
"# If not organized into categories, the value is irrelevant. There just needs to be one item in the list.\n",
"display_order = ['']\n",
"display_label = ['Vocabulary'] # these are the section labels for the categories in the page\n",
"display_comments = [''] # these are the comments about the category to be appended following the section labels\n",
"display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories\n",
"# ---------------\n",
"# Function definitions\n",
"# ---------------\n",
"# replace URL with link\n",
"def createLinks(text):\n",
" def repl(match):\n",
" if[-1] == '.':\n",
" return '<a href=\"' +[:-1] + '\">' +[:-1] + '</a>.'\n",
" return '<a href=\"' + + '\">' + + '</a>'\n",
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
" result = re.sub(pattern, repl, text)\n",
" return result"
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"term_lists_info = []\n",
"frame = pd.read_csv(githubBaseUri + 'term-lists/term-lists.csv', na_filter=False)\n",
"for termList in termLists:\n",
" term_list_dict = {'list_iri': termList}\n",
" term_list_dict = {'database': termList}\n",
" for index,row in frame.iterrows():\n",
" if row['database'] == termList:\n",
" term_list_dict['pref_ns_prefix'] = row['vann_preferredNamespacePrefix']\n",
" term_list_dict['pref_ns_uri'] = row['vann_preferredNamespaceUri']\n",
" term_list_dict['list_iri'] = row['list']\n",
" term_lists_info.append(term_list_dict)\n",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create column list\n",
"column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'definition', 'usage', 'notes', 'term_modified', 'term_deprecated', 'type']\n",
"if vocab_type == 2:\n",
" column_list += ['controlled_value_string']\n",
"elif vocab_type == 3:\n",
" column_list += ['controlled_value_string', 'skos_broader']\n",
"if organized_in_categories:\n",
" column_list.append('tdwgutility_organizedInClass')\n",
"# Create list of lists metadata table\n",
"table_list = []\n",
"for term_list in term_lists_info:\n",
" # retrieve versions metadata for term list\n",
" versions_url = githubBaseUri + term_list['database'] + '-versions/' + term_list['database'] + '-versions.csv'\n",
" versions_df = pd.read_csv(versions_url, na_filter=False)\n",
" \n",
" # retrieve current term metadata for term list\n",
" data_url = githubBaseUri + term_list['database'] + '/' + term_list['database'] + '.csv'\n",
" frame = pd.read_csv(data_url, na_filter=False)\n",
" for index,row in frame.iterrows():\n",
" row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['definition'], row['usage'], row['notes'], row['term_modified'], row['term_deprecated'], row['type']]\n",
" if vocab_type == 2:\n",
" row_list += [row['controlled_value_string']]\n",
" elif vocab_type == 3:\n",
" if row['skos_broader'] =='':\n",
" row_list += [row['controlled_value_string'], '']\n",
" else:\n",
" row_list += [row['controlled_value_string'], term_list['pref_ns_prefix'] + ':' + row['skos_broader']]\n",
" if organized_in_categories:\n",
" row_list.append(row['tdwgutility_organizedInClass'])\n",
" # Borrowed terms really don't have implemented versions. They may be lacking values for version_status.\n",
" # In their case, their version IRI will be omitted.\n",
" found = False\n",
" for vindex, vrow in versions_df.iterrows():\n",
" if vrow['term_localName']==row['term_localName'] and vrow['version_status']=='recommended':\n",
" found = True\n",
" version_iri = vrow['version']\n",
" # NOTE: the current hack for non-TDWG terms without a version is to append # to the end of the term IRI\n",
" if version_iri[len(version_iri)-1] == '#':\n",
" version_iri = ''\n",
" if not found:\n",
" version_iri = ''\n",
" row_list.append(version_iri)\n",
" table_list.append(row_list)\n",
"# Turn list of lists into dataframe\n",
"terms_df = pd.DataFrame(table_list, columns = column_list)\n",
"terms_sorted_by_label = terms_df.sort_values(by='label')\n",
"terms_sorted_by_localname = terms_df.sort_values(by='term_localName')\n",
"cell_type": "markdown",
"metadata": {},
"source": [
"Run the following cell to generate an index sorted alphabetically by lowercase term local name. Omit this index if the terms have opaque local names."
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# generate the index of terms grouped by category and sorted alphabetically by lowercase term local name\n",
"text = '### 3.1 Index By Term Name\\n\\n'\n",
"text += '(See also [3.2 Index By Label](#32-index-by-label))\\n\\n'\n",
"for category in range(0,len(display_order)):\n",
" text += '**' + display_label[category] + '**\\n'\n",
" text += '\\n'\n",
" if organized_in_categories:\n",
" filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",
" filtered_table.reset_index(drop=True, inplace=True)\n",
" else:\n",
" filtered_table = terms_sorted_by_localname\n",
" filtered_table.reset_index(drop=True, inplace=True)\n",
" \n",
" for row_index,row in filtered_table.iterrows():\n",
" curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
" curie_anchor = curie.replace(':','_')\n",
" text += '[' + curie + '](#' + curie_anchor + ')'\n",
" if row_index < len(filtered_table) - 1:\n",
" text += ' |'\n",
" text += '\\n'\n",
" text += '\\n'\n",
"index_by_name = text\n",
"cell_type": "markdown",
"metadata": {},
"source": [
"Run the following cell to generate an index by term label"
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = '\\n\\n'\n",
"# Comment out the following two lines if there is no index by local names\n",
"#text = '### 3.2 Index By Label\\n\\n'\n",
"#text += '(See also [3.1 Index By Term Name](#31-index-by-term-name))\\n\\n'\n",
"for category in range(0,len(display_order)):\n",
" if organized_in_categories:\n",
" text += '**' + display_label[category] + '**\\n'\n",
" text += '\\n'\n",
" filtered_table = terms_sorted_by_label[terms_sorted_by_label['tdwgutility_organizedInClass']==display_order[category]]\n",
" filtered_table.reset_index(drop=True, inplace=True)\n",
" else:\n",
" filtered_table = terms_sorted_by_label\n",
" filtered_table.reset_index(drop=True, inplace=True)\n",
" \n",
" for row_index,row in filtered_table.iterrows():\n",
" if row_index == 0 or (row_index != 0 and row['label'] != filtered_table.iloc[row_index - 1].loc['label']): # this is a hack to prevent duplicate labels\n",
" curie_anchor = row['pref_ns_prefix'] + \"_\" + row['term_localName']\n",
" text += '[' + row['label'] + '](#' + curie_anchor + ')'\n",
" if row_index < len(filtered_table) - 2 or (row_index == len(filtered_table) - 2 and row['label'] != filtered_table.iloc[row_index + 1].loc['label']):\n",
" text += ' |'\n",
" text += '\\n'\n",
" text += '\\n'\n",
"index_by_label = text\n",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"decisions_df = pd.read_csv('', na_filter=False)\n",
"# generate a table for each term, with terms grouped by category\n",
"# generate the Markdown for the terms table\n",
"text = '## 4 Vocabulary\\n'\n",
"for category in range(0,len(display_order)):\n",
" if organized_in_categories:\n",
" text += '### 4.' + str(category + 1) + ' ' + display_label[category] + '\\n'\n",
" text += '\\n'\n",
" text += display_comments[category] # insert the comments for the category, if any.\n",
" filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",
" filtered_table.reset_index(drop=True, inplace=True)\n",
" else:\n",
" filtered_table = terms_sorted_by_localname\n",
" filtered_table.reset_index(drop=True, inplace=True)\n",
" for row_index,row in filtered_table.iterrows():\n",
" text += '<table>\\n'\n",
" curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
" curieAnchor = curie.replace(':','_')\n",
" text += '\\t<thead>\\n'\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<th colspan=\"2\"><a id=\"' + curieAnchor + '\"></a>Term Name ' + curie + '</th>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" text += '\\t</thead>\\n'\n",
" text += '\\t<tbody>\\n'\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Term IRI</td>\\n'\n",
" uri = row['pref_ns_uri'] + row['term_localName']\n",
" text += '\\t\\t\\t<td><a href=\"' + uri + '\">' + uri + '</a></td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" text += '\\t\\t\\t<td>Modified</td>\\n'\n",
" text += '\\t\\t\\t<td>' + row['term_modified'] + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" if row['version_iri'] != '':\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Term version IRI</td>\\n'\n",
" text += '\\t\\t\\t<td><a href=\"' + row['version_iri'] + '\">' + row['version_iri'] + '</a></td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Label</td>\\n'\n",
" text += '\\t\\t\\t<td>' + row['label'] + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" if row['term_deprecated'] != '':\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td></td>\\n'\n",
" text += '\\t\\t\\t<td><strong>This term is deprecated and should no longer be used.</strong></td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Definition</td>\\n'\n",
" text += '\\t\\t\\t<td>' + row['definition'] + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" if row['usage'] != '':\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" if row['notes'] != '':\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Controlled value</td>\\n'\n",
" text += '\\t\\t\\t<td>' + row['controlled_value_string'] + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Has broader concept</td>\\n'\n",
" curieAnchor = row['skos_broader'].replace(':','_')\n",
" text += '\\t\\t\\t<td><a href=\"#' + curieAnchor + '\">' + row['skos_broader'] + '</a></td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Type</td>\\n'\n",
" if row['type'] == '':\n",
" text += '\\t\\t\\t<td>Property</td>\\n'\n",
" elif row['type'] == '':\n",
" text += '\\t\\t\\t<td>Class</td>\\n'\n",
" elif row['type'] == '':\n",
" text += '\\t\\t\\t<td>Concept</td>\\n'\n",
" else:\n",
" text += '\\t\\t\\t<td>' + row['type'] + '</td>\\n' # this should rarely happen\n",
" text += '\\t\\t</tr>\\n'\n",
" # Look up decisions related to this term\n",
" for drow_index,drow in decisions_df.iterrows():\n",
" if drow['linked_affected_resource'] == uri:\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Executive Committee decision</td>\\n'\n",
" text += '\\t\\t\\t<td><a href=\"' + drow['decision_localName'] + '\">' + drow['decision_localName'] + '</a></td>\\n'\n",
" text += '\\t\\t</tr>\\n' \n",
" text += '\\t</tbody>\\n'\n",
" text += '</table>\\n'\n",
" text += '\\n'\n",
" text += '\\n'\n",
"term_table = text\n",
"cell_type": "markdown",
"metadata": {},
"source": [
"Modify to display the indices that you want"
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = index_by_label + term_table\n",
"#text = index_by_name + index_by_label + term_table"
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# read in header and footer, merge with terms table, and output\n",
"headerObject = open(headerFileName, 'rt', encoding='utf-8')\n",
"header =\n",
"footerObject = open(footerFileName, 'rt', encoding='utf-8')\n",
"footer =\n",
"output = header + text + footer\n",
"outputObject = open(outFileName, 'wt', encoding='utf-8')\n",
" \n",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
"nbformat": 4,
"nbformat_minor": 2

@ -0,0 +1,55 @@
# Degree of Establishment Controlled Vocabulary List of Terms
: Degree of Establishment Controlled Vocabulary List of Terms
Namespace URI
: <>
Preferred namespace abbreviation
: dwcdoe:
Date version issued
: 2020-10-13
Date created
: 2020-10-13
Part of TDWG Standard
: <>
This document version
: <>
Latest version of document
: <>
: The Darwin Core term `degreeOfEstablishment` provides information about degree to which an Organism survives, reproduces, and expands its range at the given place and time.. The Degree of Establishment Controlled Vocabulary provides terms that should be used as values for `dwc:degreeOfEstablishment` and `dwciri:degreeOfEstablishment`.
: Quentin Groom, Peter Desmet, Lien Reyserhove, Tim Adriaens, Damiano Oldoni, Sonia Vanderhoeven, Steven J Baskauf, Arthur Chapman, Melodie McGeoch, Ramona Walls, John Wieczorek, John R.U. Wilson, Paula F F Zermoglio, Annie Simpson
: TDWG Darwin Core Maintenance Group
Bibliographic citation
: Darwin Core Maintenance Group. 2020. Degree of Establishment Controlled Vocabulary List of Terms. Biodiversity Information Standards (TDWG). <>
## 1 Introduction
This document includes terms intended to be used as a controlled value for Darwin Core terms with local name `degreeOfEstablishment`. For details and rationale, see Groom et al. 2019. Improving Darwin Core for research and management of alien species. <>
### 1.1 Status of the content of this document
In Section 4, the values of the `Term IRI`, `Definition`, and `Controlled value` are normative. The value of `Usage` (if it exists for a given term) is normative. The values of `Term Name` are non-normative, although one can expect that the namespace abbreviation prefix is one commonly used for the term namespace. `Label` and the values of all other properties (such as `Notes`) are non-normative.
### 1.2 RFC 2119 key words
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](
## 2 Use of Terms
Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](, term IRIs MUST be used as values of `dwciri:degreeOfEstablishment`. Controlled value strings MUST be used as values of `dwc:degreeOfEstablishment`.
## 3 Term index

@ -0,0 +1,55 @@
# Establishment Means Controlled Vocabulary List of Terms
: Establishment Means Controlled Vocabulary List of Terms
Namespace URI
: <>
Preferred namespace abbreviation
: dwcem:
Date version issued
: 2020-10-13
Date created
: 2020-10-13
Part of TDWG Standard
: <>
This document version
: <>
Latest version of document
: <>
: The Darwin Core term `establishmentMeans` provides information about whether an organism or organisms have been introduced to a given place and time through the direct or indirect activity of modern humans. The Establishment Means Controlled Vocabulary provides terms that should be used as values for `dwc:establishmentMeans` and `dwciri:establishmentMeans`.
: Quentin Groom, Peter Desmet, Lien Reyserhove, Tim Adriaens, Damiano Oldoni, Sonia Vanderhoeven, Steven J Baskauf, Arthur Chapman, Melodie McGeoch, Ramona Walls, John Wieczorek, John R.U. Wilson, Paula F F Zermoglio, Annie Simpson
: TDWG Darwin Core Maintenance Group
Bibliographic citation
: Darwin Core Maintenance Group. 2020. Establishment Means Controlled Vocabulary List of Terms. Biodiversity Information Standards (TDWG). <>
## 1 Introduction
This document includes terms intended to be used as a controlled value for Darwin Core terms with local name `establishmentMeans`. For details and rationale, see Groom et al. 2019. Improving Darwin Core for research and management of alien species. <>
### 1.1 Status of the content of this document
In Section 4, the values of the `Term IRI`, `Definition`, and `Controlled value` are normative. The value of `Usage` (if it exists for a given term) is normative. The values of `Term Name` are non-normative, although one can expect that the namespace abbreviation prefix is one commonly used for the term namespace. `Label` and the values of all other properties (such as `Notes`) are non-normative.
### 1.2 RFC 2119 key words
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](
## 2 Use of Terms
Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](, term IRIs MUST be used as values of `dwciri:establishmentMeans`. Controlled value strings MUST be used as values of `dwc:establishmentMeans`.
## 3 Term index

@ -0,0 +1,55 @@
# Pathway Controlled Vocabulary List of Terms
: Pathway Controlled Vocabulary List of Terms
Namespace URI
: <>
Preferred namespace abbreviation
: dwcpw:
Date version issued
: 2020-10-13
Date created
: 2020-10-13
Part of TDWG Standard
: <>
This document version
: <>
Latest version of document
: <>
: The Darwin Core term `pathway` provides information about the process by which an Organism came to be in a given place at a given time. The Pathway Controlled Vocabulary provides terms that should be used as values for `dwc:pathway` and `dwciri:pathway`.
: Quentin Groom, Peter Desmet, Lien Reyserhove, Tim Adriaens, Damiano Oldoni, Sonia Vanderhoeven, Steven J Baskauf, Arthur Chapman, Melodie McGeoch, Ramona Walls, John Wieczorek, John R.U. Wilson, Paula F F Zermoglio, Annie Simpson
: TDWG Darwin Core Maintenance Group
Bibliographic citation
: Darwin Core Maintenance Group. 2020. Pathway Controlled Vocabulary List of Terms. Biodiversity Information Standards (TDWG). <>
## 1 Introduction
This document includes terms intended to be used as a controlled value for Darwin Core terms with local name `pathway`. For details and rationale, see Groom et al. 2019. Improving Darwin Core for research and management of alien species. <>
### 1.1 Status of the content of this document
In Section 4, the values of the `Term IRI`, `Definition`, and `Controlled value` are normative. The value of `Usage` (if it exists for a given term) is normative. The value of `Has broader concept` is normative. The values of `Term Name` are non-normative, although one can expect that the namespace abbreviation prefix is one commonly used for the term namespace. `Label` and the values of all other properties (such as `Notes`) are non-normative.
### 1.2 RFC 2119 key words
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](
## 2 Use of Terms
Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](, term IRIs MUST be used as values of `dwciri:pathway`. Controlled value strings MUST be used as values of `dwc:pathway`.
## 3 Term index