move list of terms build script into dwc repo and run to generate page

2020-08-12 14:40:36 -05:00 · 2020-08-12 14:40:36 -05:00 · b16d8931a1
parent 1d52ee818d
commit b16d8931a1
4 changed files with 14404 additions and 0 deletions
--- a/build/build-termlist.ipynb
+++ b/build/build-termlist.ipynb
@ -0,0 +1,581 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Script to build Markdown pages that provide term metadata for complex vocabularies\n",
+    "# Steve Baskauf 2020-08-12 CC0\n",
+    "# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the rs.tdwg.org repo from the TDWG Github site\n",
+    "\n",
+    "import re\n",
+    "import requests   # best library to manage HTTP transactions\n",
+    "import csv        # library to read/write/parse CSV files\n",
+    "import json       # library to convert JSON to Python data structures\n",
+    "import pandas as pd\n",
+    "\n",
+    "# -----------------\n",
+    "# Configuration section\n",
+    "# -----------------\n",
+    "\n",
+    "# !!!! NOTE !!!!\n",
+    "# There is not currently an example of a complex vocabulary that has the column headers\n",
+    "# used in the sample files. In order to test this script, it uses the Audubon Core files,\n",
+    "# which have headers that differ from the samples. So throughout the code, there are\n",
+    "# pairs of lines where the default header names are commented out and the Audubon Core\n",
+    "# headers are not. To build a page using the sample files, you will need to reverse the\n",
+    "# commenting of these pairs.\n",
+    "\n",
+    "# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub\n",
+    "githubBaseUri = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/'\n",
+    "\n",
+    "headerFileName = 'termlist-header.md'\n",
+    "footerFileName = 'termlist-footer.md'\n",
+    "outFileName = '../docs/list/index.md'\n",
+    "\n",
+    "# This is a Python list of the database names of the term lists to be included in the document.\n",
+    "termLists = ['terms', 'iri', 'dc-for-dwc', 'dcterms-for-dwc']\n",
+    "#termLists = ['pathway']\n",
+    "\n",
+    "# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ\n",
+    "# However, there probably aren't any circumstances where mixed types will be used to generate the same page.\n",
+    "vocab_type = 1 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy\n",
+    "\n",
+    "# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass\n",
+    "# If so, those categories can be used to group terms in the generated term list document.\n",
+    "organized_in_categories = True\n",
+    "\n",
+    "# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass\n",
+    "# If not organized into categories, the value is irrelevant. There just needs to be one item in the list.\n",
+    "display_order = ['', 'http://purl.org/dc/elements/1.1/', 'http://purl.org/dc/terms/', 'http://rs.tdwg.org/dwc/terms/Occurrence', 'http://rs.tdwg.org/dwc/terms/Organism', 'http://rs.tdwg.org/dwc/terms/MaterialSample', 'http://rs.tdwg.org/dwc/terms/Event', 'http://purl.org/dc/terms/Location', 'http://rs.tdwg.org/dwc/terms/GeologicalContext', 'http://rs.tdwg.org/dwc/terms/Identification', 'http://rs.tdwg.org/dwc/terms/Taxon', 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', 'http://rs.tdwg.org/dwc/terms/ResourceRelationship', 'http://rs.tdwg.org/dwc/terms/attributes/UseWithIRI']\n",
+    "display_label = ['Record level', 'Dublin Core legacy namespace', 'Dublin Core terms namespace', 'Occurrence', 'Organism', 'Material Sample', 'Event', 'Location', 'Geological Context', 'Identification', 'Taxon', 'Measurement or Fact', 'Resource Relationship', 'IRI-value terms']\n",
+    "display_comments = ['','','','','','','','','','','','','','']\n",
+    "display_id = ['record_level', 'dc', 'dcterms', 'occurrence', 'organism', 'material_sample', 'event', 'location', 'geological_context', 'identification', 'taxon', 'measurement_or_fact', 'resource_relationship', 'use_with_iri']\n",
+    "\n",
+    "#display_order = ['']\n",
+    "#display_label = ['Vocabulary'] # these are the section labels for the categories in the page\n",
+    "#display_comments = [''] # these are the comments about the category to be appended following the section labels\n",
+    "#display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories\n",
+    "\n",
+    "# ---------------\n",
+    "# Function definitions\n",
+    "# ---------------\n",
+    "\n",
+    "# replace URL with link\n",
+    "#\n",
+    "def createLinks(text):\n",
+    "    def repl(match):\n",
+    "        if match.group(1)[-1] == '.':\n",
+    "            return '<a href=\"' + match.group(1)[:-1] + '\">' + match.group(1)[:-1] + '</a>.'\n",
+    "        return '<a href=\"' + match.group(1) + '\">' + match.group(1) + '</a>'\n",
+    "\n",
+    "    pattern = '(https?://[^\\s,;\\)\"]*)'\n",
+    "    result = re.sub(pattern, repl, text)\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Retrieving term list metadata from GitHub\n",
+      "[{'database': 'terms', 'pref_ns_prefix': 'dwc', 'pref_ns_uri': 'http://rs.tdwg.org/dwc/terms/', 'list_iri': 'http://rs.tdwg.org/dwc/terms/'}, {'database': 'iri', 'pref_ns_prefix': 'dwciri', 'pref_ns_uri': 'http://rs.tdwg.org/dwc/iri/', 'list_iri': 'http://rs.tdwg.org/dwc/iri/'}, {'database': 'dc-for-dwc', 'pref_ns_prefix': 'dc', 'pref_ns_uri': 'http://purl.org/dc/elements/1.1/', 'list_iri': 'http://rs.tdwg.org/dwc/dc/'}, {'database': 'dcterms-for-dwc', 'pref_ns_prefix': 'dcterms', 'pref_ns_uri': 'http://purl.org/dc/terms/', 'list_iri': 'http://rs.tdwg.org/dwc/dcterms/'}]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------\n",
+    "# Retrieve term list metadata from GitHub\n",
+    "# ---------------\n",
+    "\n",
+    "print('Retrieving term list metadata from GitHub')\n",
+    "term_lists_info = []\n",
+    "\n",
+    "frame = pd.read_csv(githubBaseUri + 'term-lists/term-lists.csv', na_filter=False)\n",
+    "for termList in termLists:\n",
+    "    term_list_dict = {'list_iri': termList}\n",
+    "    term_list_dict = {'database': termList}\n",
+    "    for index,row in frame.iterrows():\n",
+    "        if row['database'] == termList:\n",
+    "            term_list_dict['pref_ns_prefix'] = row['vann_preferredNamespacePrefix']\n",
+    "            term_list_dict['pref_ns_uri'] = row['vann_preferredNamespaceUri']\n",
+    "            term_list_dict['list_iri'] = row['list']\n",
+    "    term_lists_info.append(term_list_dict)\n",
+    "print(term_lists_info)\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Retrieving metadata about terms from all namespaces from GitHub\n",
+      "done retrieving\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------\n",
+    "# Create metadata table and populate using data from namespace databases in GitHub\n",
+    "# ---------------\n",
+    "\n",
+    "# Create column list\n",
+    "column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'rdfs_comment', 'dcterms_description', 'examples', 'term_modified', 'term_deprecated', 'rdf_type', 'replaces_term', 'replaces1_term']\n",
+    "#column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'definition', 'usage', 'notes', 'term_modified', 'term_deprecated', 'type']\n",
+    "if vocab_type == 2:\n",
+    "    column_list += ['controlled_value_string']\n",
+    "elif vocab_type == 3:\n",
+    "    column_list += ['controlled_value_string', 'skos_broader']\n",
+    "if organized_in_categories:\n",
+    "    column_list.append('tdwgutility_organizedInClass')\n",
+    "column_list.append('version_iri')\n",
+    "\n",
+    "print('Retrieving metadata about terms from all namespaces from GitHub')\n",
+    "# Create list of lists metadata table\n",
+    "table_list = []\n",
+    "for term_list in term_lists_info:\n",
+    "    # retrieve versions metadata for term list\n",
+    "    versions_url = githubBaseUri + term_list['database'] + '-versions/' + term_list['database'] + '-versions.csv'\n",
+    "    versions_df = pd.read_csv(versions_url, na_filter=False)\n",
+    "    \n",
+    "    # retrieve current term metadata for term list\n",
+    "    data_url = githubBaseUri + term_list['database'] + '/' + term_list['database'] + '.csv'\n",
+    "    frame = pd.read_csv(data_url, na_filter=False)\n",
+    "    for index,row in frame.iterrows():\n",
+    "        row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['rdfs_comment'], row['dcterms_description'], row['examples'], row['term_modified'], row['term_deprecated'], row['rdf_type'], row['replaces_term'], row['replaces1_term']]\n",
+    "        #row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['definition'], row['usage'], row['notes'], row['term_modified'], row['term_deprecated'], row['type']]\n",
+    "        if vocab_type == 2:\n",
+    "            row_list += [row['controlled_value_string']]\n",
+    "        elif vocab_type == 3:\n",
+    "            if row['skos_broader'] =='':\n",
+    "                row_list += [row['controlled_value_string'], '']\n",
+    "            else:\n",
+    "                row_list += [row['controlled_value_string'], term_list['pref_ns_prefix'] + ':' + row['skos_broader']]\n",
+    "        if organized_in_categories:\n",
+    "            row_list.append(row['tdwgutility_organizedInClass'])\n",
+    "\n",
+    "        # Borrowed terms really don't have implemented versions. They may be lacking values for version_status.\n",
+    "        # In their case, their version IRI will be omitted.\n",
+    "        found = False\n",
+    "        for vindex, vrow in versions_df.iterrows():\n",
+    "            if vrow['term_localName']==row['term_localName'] and vrow['version_status']=='recommended':\n",
+    "                found = True\n",
+    "                version_iri = vrow['version']\n",
+    "                # NOTE: the current hack for non-TDWG terms without a version is to append # to the end of the term IRI\n",
+    "                if version_iri[len(version_iri)-1] == '#':\n",
+    "                    version_iri = ''\n",
+    "        if not found:\n",
+    "            version_iri = ''\n",
+    "        row_list.append(version_iri)\n",
+    "\n",
+    "        table_list.append(row_list)\n",
+    "\n",
+    "# Turn list of lists into dataframe\n",
+    "terms_df = pd.DataFrame(table_list, columns = column_list)\n",
+    "\n",
+    "terms_sorted_by_label = terms_df.sort_values(by='label')\n",
+    "#terms_sorted_by_localname = terms_df.sort_values(by='term_localName')\n",
+    "\n",
+    "# This makes sort case insensitive\n",
+    "terms_sorted_by_localname = terms_df.iloc[terms_df.term_localName.str.lower().argsort()]\n",
+    "#terms_sorted_by_localname\n",
+    "print('done retrieving')\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run the following cell to generate an index sorted alphabetically by lowercase term local name. Omit this index if the terms have opaque local names."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generating term index by CURIE\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------\n",
+    "# generate the index of terms grouped by category and sorted alphabetically by lowercase term local name\n",
+    "# ---------------\n",
+    "\n",
+    "print('Generating term index by CURIE')\n",
+    "text = '### 3.1 Index By Term Name\\n\\n'\n",
+    "text += '(See also [3.2 Index By Label](#32-index-by-label))\\n\\n'\n",
+    "\n",
+    "text += '**Classes**\\n'\n",
+    "text += '\\n'\n",
+    "for row_index,row in terms_sorted_by_localname.iterrows():\n",
+    "    if row['rdf_type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
+    "        curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
+    "        curie_anchor = curie.replace(':','_')\n",
+    "        text += '[' + curie + '](#' + curie_anchor + ')'\n",
+    "        if row_index < len(terms_sorted_by_localname) - 1:\n",
+    "            text += ' |'\n",
+    "        text += '\\n'\n",
+    "text += '\\n'\n",
+    "\n",
+    "for category in range(0,len(display_order)):\n",
+    "    text += '**' + display_label[category] + '**\\n'\n",
+    "    text += '\\n'\n",
+    "    if organized_in_categories:\n",
+    "        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",
+    "        filtered_table.reset_index(drop=True, inplace=True)\n",
+    "    else:\n",
+    "        filtered_table = terms_sorted_by_localname\n",
+    "        \n",
+    "    for row_index,row in filtered_table.iterrows():\n",
+    "        if row['rdf_type'] != 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
+    "            curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
+    "            curie_anchor = curie.replace(':','_')\n",
+    "            text += '[' + curie + '](#' + curie_anchor + ')'\n",
+    "            if row_index < len(filtered_table) - 1:\n",
+    "                text += ' |'\n",
+    "            text += '\\n'\n",
+    "    text += '\\n'\n",
+    "index_by_name = text\n",
+    "\n",
+    "#print(index_by_name)\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run the following cell to generate an index by term label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generating term index by label\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------\n",
+    "# generate the index of terms by label\n",
+    "# ---------------\n",
+    "\n",
+    "print('Generating term index by label')\n",
+    "text = '\\n\\n'\n",
+    "\n",
+    "# Comment out the following two lines if there is no index by local names\n",
+    "text = '### 3.2 Index By Label\\n\\n'\n",
+    "text += '(See also [3.1 Index By Term Name](#31-index-by-term-name))\\n\\n'\n",
+    "\n",
+    "text += '**Classes**\\n'\n",
+    "text += '\\n'\n",
+    "for row_index,row in terms_sorted_by_label.iterrows():\n",
+    "    if row['rdf_type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
+    "        curie_anchor = row['pref_ns_prefix'] + \"_\" + row['term_localName']\n",
+    "        text += '[' + row['label'] + '](#' + curie_anchor + ')'\n",
+    "        if row_index < len(terms_sorted_by_label) - 1:\n",
+    "            text += ' |'\n",
+    "        text += '\\n'\n",
+    "text += '\\n'\n",
+    "\n",
+    "for category in range(0,len(display_order)):\n",
+    "    if organized_in_categories:\n",
+    "        text += '**' + display_label[category] + '**\\n'\n",
+    "        text += '\\n'\n",
+    "        filtered_table = terms_sorted_by_label[terms_sorted_by_label['tdwgutility_organizedInClass']==display_order[category]]\n",
+    "        filtered_table.reset_index(drop=True, inplace=True)\n",
+    "    else:\n",
+    "        filtered_table = terms_sorted_by_label\n",
+    "        \n",
+    "    for row_index,row in filtered_table.iterrows():\n",
+    "        if row_index == 0 or (row_index != 0 and row['label'] != filtered_table.iloc[row_index - 1].loc['label']): # this is a hack to prevent duplicate labels\n",
+    "            if row['rdf_type'] != 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
+    "                curie_anchor = row['pref_ns_prefix'] + \"_\" + row['term_localName']\n",
+    "                text += '[' + row['label'] + '](#' + curie_anchor + ')'\n",
+    "                if row_index < len(filtered_table) - 2 or (row_index == len(filtered_table) - 2 and row['label'] != filtered_table.iloc[row_index + 1].loc['label']):\n",
+    "                    text += ' |'\n",
+    "                text += '\\n'\n",
+    "    text += '\\n'\n",
+    "index_by_label = text\n",
+    "print()\n",
+    "\n",
+    "#print(index_by_label)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generating terms table\n",
+      "done generating\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "decisions_df = pd.read_csv('https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/decisions/decisions-links.csv', na_filter=False)\n",
+    "\n",
+    "# ---------------\n",
+    "# generate a table for each term, with terms grouped by category\n",
+    "# ---------------\n",
+    "\n",
+    "print('Generating terms table')\n",
+    "# generate the Markdown for the terms table\n",
+    "text = '## 4 Vocabulary\\n'\n",
+    "if True:\n",
+    "    filtered_table = terms_sorted_by_localname\n",
+    "\n",
+    "#for category in range(0,len(display_order)):\n",
+    "#    if organized_in_categories:\n",
+    "#        text += '### 4.' + str(category + 1) + ' ' + display_label[category] + '\\n'\n",
+    "#        text += '\\n'\n",
+    "#        text += display_comments[category] # insert the comments for the category, if any.\n",
+    "#        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",
+    "#        filtered_table.reset_index(drop=True, inplace=True)\n",
+    "#    else:\n",
+    "#        filtered_table = terms_sorted_by_localname\n",
+    "\n",
+    "    for row_index,row in filtered_table.iterrows():\n",
+    "        text += '<table>\\n'\n",
+    "        curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
+    "        curieAnchor = curie.replace(':','_')\n",
+    "        text += '\\t<thead>\\n'\n",
+    "        text += '\\t\\t<tr>\\n'\n",
+    "        text += '\\t\\t\\t<th colspan=\"2\"><a id=\"' + curieAnchor + '\"></a>Term Name  ' + curie + '</th>\\n'\n",
+    "        text += '\\t\\t</tr>\\n'\n",
+    "        text += '\\t</thead>\\n'\n",
+    "        text += '\\t<tbody>\\n'\n",
+    "        text += '\\t\\t<tr>\\n'\n",
+    "        text += '\\t\\t\\t<td>Term IRI</td>\\n'\n",
+    "        uri = row['pref_ns_uri'] + row['term_localName']\n",
+    "        text += '\\t\\t\\t<td><a href=\"' + uri + '\">' + uri + '</a></td>\\n'\n",
+    "        text += '\\t\\t</tr>\\n'\n",
+    "        text += '\\t\\t\\t<td>Modified</td>\\n'\n",
+    "        text += '\\t\\t\\t<td>' + row['term_modified'] + '</td>\\n'\n",
+    "        text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        if row['version_iri'] != '':\n",
+    "            text += '\\t\\t<tr>\\n'\n",
+    "            text += '\\t\\t\\t<td>Term version IRI</td>\\n'\n",
+    "            text += '\\t\\t\\t<td><a href=\"' + row['version_iri'] + '\">' + row['version_iri'] + '</a></td>\\n'\n",
+    "            text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        text += '\\t\\t<tr>\\n'\n",
+    "        text += '\\t\\t\\t<td>Label</td>\\n'\n",
+    "        text += '\\t\\t\\t<td>' + row['label'] + '</td>\\n'\n",
+    "        text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        if row['term_deprecated'] != '':\n",
+    "            text += '\\t\\t<tr>\\n'\n",
+    "            text += '\\t\\t\\t<td></td>\\n'\n",
+    "            text += '\\t\\t\\t<td><strong>This term is deprecated and should no longer be used.</strong></td>\\n'\n",
+    "            text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "            for dep_index,dep_row in filtered_table.iterrows():\n",
+    "                if dep_row['replaces_term'] == uri:\n",
+    "                    text += '\\t\\t<tr>\\n'\n",
+    "                    text += '\\t\\t\\t<td>Is replaced by</td>\\n'\n",
+    "                    text += '\\t\\t\\t<td><a href=\"#' + dep_row['pref_ns_prefix'] + \"_\" + dep_row['term_localName'] + '\">' + dep_row['pref_ns_uri'] + dep_row['term_localName'] + '</a></td>\\n'\n",
+    "                    text += '\\t\\t</tr>\\n'\n",
+    "                if dep_row['replaces1_term'] == uri:\n",
+    "                    text += '\\t\\t<tr>\\n'\n",
+    "                    text += '\\t\\t\\t<td>Is replaced by</td>\\n'\n",
+    "                    text += '\\t\\t\\t<td><a href=\"#' + dep_row['pref_ns_prefix'] + \"_\" + dep_row['term_localName'] + '\">' + dep_row['pref_ns_uri'] + dep_row['term_localName'] + '</a></td>\\n'\n",
+    "                    text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        text += '\\t\\t<tr>\\n'\n",
+    "        text += '\\t\\t\\t<td>Definition</td>\\n'\n",
+    "        text += '\\t\\t\\t<td>' + row['rdfs_comment'] + '</td>\\n'\n",
+    "        #text += '\\t\\t\\t<td>' + row['definition'] + '</td>\\n'\n",
+    "        text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        if row['dcterms_description'] != '':\n",
+    "        #if row['notes'] != '':\n",
+    "            text += '\\t\\t<tr>\\n'\n",
+    "            text += '\\t\\t\\t<td>Notes</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + createLinks(row['dcterms_description']) + '</td>\\n'\n",
+    "            #text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
+    "            text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        if row['examples'] != '':\n",
+    "        #if row['usage'] != '':\n",
+    "            text += '\\t\\t<tr>\\n'\n",
+    "            text += '\\t\\t\\t<td>Examples</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + createLinks(row['examples']) + '</td>\\n'\n",
+    "            #text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
+    "            text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        if vocab_type == 2 or vocab_type ==3: # controlled vocabulary\n",
+    "            text += '\\t\\t<tr>\\n'\n",
+    "            text += '\\t\\t\\t<td>Controlled value</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + row['controlled_value_string'] + '</td>\\n'\n",
+    "            text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships\n",
+    "            text += '\\t\\t<tr>\\n'\n",
+    "            text += '\\t\\t\\t<td>Has broader concept</td>\\n'\n",
+    "            curieAnchor = row['skos_broader'].replace(':','_')\n",
+    "            text += '\\t\\t\\t<td><a href=\"#' + curieAnchor + '\">' + row['skos_broader'] + '</a></td>\\n'\n",
+    "            text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        text += '\\t\\t<tr>\\n'\n",
+    "        text += '\\t\\t\\t<td>Type</td>\\n'\n",
+    "        if row['rdf_type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':\n",
+    "        #if row['type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':\n",
+    "            text += '\\t\\t\\t<td>Property</td>\\n'\n",
+    "        elif row['rdf_type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
+    "        #elif row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
+    "            text += '\\t\\t\\t<td>Class</td>\\n'\n",
+    "        elif row['rdf_type'] == 'http://www.w3.org/2004/02/skos/core#Concept':\n",
+    "        #elif row['type'] == 'http://www.w3.org/2004/02/skos/core#Concept':\n",
+    "            text += '\\t\\t\\t<td>Concept</td>\\n'\n",
+    "        else:\n",
+    "            text += '\\t\\t\\t<td>' + row['rdf_type'] + '</td>\\n' # this should rarely happen\n",
+    "            #text += '\\t\\t\\t<td>' + row['type'] + '</td>\\n' # this should rarely happen\n",
+    "        text += '\\t\\t</tr>\\n'\n",
+    "\n",
+    "        # Look up decisions related to this term\n",
+    "        for drow_index,drow in decisions_df.iterrows():\n",
+    "            if drow['linked_affected_resource'] == uri:\n",
+    "                text += '\\t\\t<tr>\\n'\n",
+    "                text += '\\t\\t\\t<td>Executive Committee decision</td>\\n'\n",
+    "                text += '\\t\\t\\t<td><a href=\"http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '\">http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '</a></td>\\n'\n",
+    "                text += '\\t\\t</tr>\\n'                        \n",
+    "\n",
+    "        text += '\\t</tbody>\\n'\n",
+    "        text += '</table>\\n'\n",
+    "        text += '\\n'\n",
+    "    text += '\\n'\n",
+    "term_table = text\n",
+    "print('done generating')\n",
+    "print()\n",
+    "\n",
+    "#print(term_table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Modify to display the indices that you want"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Merging term table with header and footer and saving file\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------\n",
+    "# Merge term table with header and footer Markdown, then save file\n",
+    "# ---------------\n",
+    "\n",
+    "print('Merging term table with header and footer and saving file')\n",
+    "#text = index_by_label + term_table\n",
+    "text = index_by_name + index_by_label + term_table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "done\n"
+     ]
+    }
+   ],
+   "source": [
+    "# read in header and footer, merge with terms table, and output\n",
+    "\n",
+    "headerObject = open(headerFileName, 'rt', encoding='utf-8')\n",
+    "header = headerObject.read()\n",
+    "headerObject.close()\n",
+    "\n",
+    "footerObject = open(footerFileName, 'rt', encoding='utf-8')\n",
+    "footer = footerObject.read()\n",
+    "footerObject.close()\n",
+    "\n",
+    "output = header + text + footer\n",
+    "outputObject = open(outFileName, 'wt', encoding='utf-8')\n",
+    "outputObject.write(output)\n",
+    "outputObject.close()\n",
+    "    \n",
+    "print('done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/build/termlist-footer.md
+++ b/build/termlist-footer.md
--- a/build/termlist-header.md
+++ b/build/termlist-header.md
@ -0,0 +1,66 @@
+# List of Darwin Core terms
+
+Title
+: List of Darwin Core terms
+
+Date version issued
+: 2020-08-12
+
+Date created
+: 2020-08-12
+
+Part of TDWG Standard
+: <http://www.tdwg.org/standards/450>
+
+This version
+: <http://rs.tdwg.org/dwc/doc/list/2020-08-12>
+
+Latest version
+: <http://rs.tdwg.org/dwc/doc/list/>
+
+Abstract
+: Darwin Core is a vocabulary standard for transmitting information about biodiversity. This document lists all terms in namespaces currently used in the vocabulary.
+
+Contributors
+: John Wieczorek (VertNet), Peter Desmet (INBO), Steve Baskauf (TDWG RDF/OWL Task Group), Tim Robertson (GBIF), Markus Döring (GBIF), Quentin Groom (Botanic Garden Meise), Stijn Van Hoey (INBO), David Bloom (VertNet), Paula Zermoglio (VertNet), Robert Guralnick (University of Florida), John Deck (Genomic Biodiversity Working Group), Gail Kampmeier (INHS), Dave Vieglais (KUNHM), Renato De Giovanni (CRIA), Campbell Webb (TDWG RDF/OWL Task Group), Paul J. Morris (Harvard University Herbaria/Museum of Comparative Zoölogy), Mark Schildhauer (NCEAS)
+
+Creator
+: TDWG Darwin Core Maintenance Group
+
+Bibliographic citation
+: Darwin Core Maintenance Group. 2020. List of Darwin Core terms. Biodiversity Information Standards (TDWG). <http://rs.tdwg.org/dwc/doc/list/2020-08-12>
+
+
+## 1 Introduction (Informative)
+
+This document contains terms that are part of the most recent version of the Darwin Core vocabulary (http://rs.tdwg.org/version/dwc/2020-08-12).
+
+This document includes terms in four namespaces that contain recommended terms: `dwc:`, `dwciri:`, `dc:`, and `dcterms:`. However, some terms in these namespaces are deprecated and should no longer be used. Deprecation is noted in the term metadata. Namespaces that contain only deprecated terms are not included in this document, but metadata about those terms can be retrieved by dereferencing their IRIs.
+
+### 1.1 Status of the content of this document
+
+Sections 1 and 3 are non-normative.
+
+Section 2 is normative.
+
+In Section 4, the values of the `Term IRI` and `Definition` are normative. The values of `Term Name` are non-normative, although one can expect that the namespace abbreviation prefix is one commonly used for the term namespace.  `Label` and the values of all other properties (such as `Examples` and `Notes`) are non-normative.
+
+### 1.2 RFC 2119 key words
+The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119).
+
+### 1.3 Namespace abbreviations
+
+The following namespace abbreviations are used in this document:
+
+| abbreviation | IRI |
+| --- | --- |
+| dwc: | http://rs.tdwg.org/dwc/terms/ |
+| dwciri: | http://rs.tdwg.org/dwc/iri/ |
+| dc: | http://purl.org/dc/elements/1.1/ |
+| dcterms: | http://purl.org/dc/terms/ |
+
+## 2 Use of Terms
+
+Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](https://dwc.tdwg.org/rdf/#143-use-of-darwin-core-terms-in-rdf-normative), terms in the `dwciri:` namespace MUST be used with IRI values. Terms in the `dwc:` and `dc:` namespaces are generally expected to have string literal values. Values for terms in the `dcterms:` namespace will depend on the details of the term. See [Section 3 of the Darwin Core RDF Guide](https://dwc.tdwg.org/rdf/#3-term-reference-normative) for details.
+
+## 3 Term indices
--- a/docs/list/index.md
+++ b/docs/list/index.md