dwc/build/pw-cv-build/build-page-simple.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Script to build Markdown pages that provide term metadata for simple vocabularies\n",
    "# Steve Baskauf 2020-06-28 CC0\n",
    "# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the rs.tdwg.org repo from the TDWG Github site\n",
    "\n",
    "# Note: this script calls a function from http_library.py, which requires importing the requests, csv, and json modules\n",
    "import re\n",
    "import requests   # best library to manage HTTP transactions\n",
    "import csv        # library to read/write/parse CSV files\n",
    "import json       # library to convert JSON to Python data structures\n",
    "import pandas as pd\n",
    "\n",
    "# -----------------\n",
    "# Configuration section\n",
    "# -----------------\n",
    "\n",
    "# !!!! Note !!!!\n",
    "# This is an example of a simple vocabulary without categories. For a complex example\n",
    "# with multiple namespaces and several categories, see build-page-categories.ipynb\n",
    "\n",
    "# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub. In this example,\n",
    "# the branch is named \"pathway\"\n",
    "githubBaseUri = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/'\n",
    "\n",
    "headerFileName = 'termlist-header.md'\n",
    "footerFileName = 'termlist-footer.md'\n",
    "outFileName = '../../docs/pw/index.md'\n",
    "\n",
    "# This is a Python list of the database names of the term lists to be included in the document.\n",
    "termLists = ['pathway']\n",
    "\n",
    "# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ\n",
    "# However, there probably aren't any circumstances where mixed types will be used to generate the same page.\n",
    "vocab_type = 3 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy\n",
    "\n",
    "# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass\n",
    "# If so, those categories can be used to group terms in the generated term list document.\n",
    "organized_in_categories = False\n",
    "\n",
    "# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass\n",
    "# If not organized into categories, the value is irrelevant. There just needs to be one item in the list.\n",
    "display_order = ['']\n",
    "display_label = ['Vocabulary'] # these are the section labels for the categories in the page\n",
    "display_comments = [''] # these are the comments about the category to be appended following the section labels\n",
    "display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories\n",
    "\n",
    "# ---------------\n",
    "# Function definitions\n",
    "# ---------------\n",
    "\n",
    "# replace URL with link\n",
    "#\n",
    "def createLinks(text):\n",
    "    def repl(match):\n",
    "        if match.group(1)[-1] == '.':\n",
    "            return '<a href=\"' + match.group(1)[:-1] + '\">' + match.group(1)[:-1] + '</a>.'\n",
    "        return '<a href=\"' + match.group(1) + '\">' + match.group(1) + '</a>'\n",
    "\n",
    "    pattern = '(https?://[^\\s,;\\)\"]*)'\n",
    "    result = re.sub(pattern, repl, text)\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "term_lists_info = []\n",
    "\n",
    "frame = pd.read_csv(githubBaseUri + 'term-lists/term-lists.csv', na_filter=False)\n",
    "for termList in termLists:\n",
    "    term_list_dict = {'list_iri': termList}\n",
    "    term_list_dict = {'database': termList}\n",
    "    for index,row in frame.iterrows():\n",
    "        if row['database'] == termList:\n",
    "            term_list_dict['pref_ns_prefix'] = row['vann_preferredNamespacePrefix']\n",
    "            term_list_dict['pref_ns_uri'] = row['vann_preferredNamespaceUri']\n",
    "            term_list_dict['list_iri'] = row['list']\n",
    "    term_lists_info.append(term_list_dict)\n",
    "print(term_lists_info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create column list\n",
    "column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'definition', 'usage', 'notes', 'term_modified', 'term_deprecated', 'type']\n",
    "if vocab_type == 2:\n",
    "    column_list += ['controlled_value_string']\n",
    "elif vocab_type == 3:\n",
    "    column_list += ['controlled_value_string', 'skos_broader']\n",
    "if organized_in_categories:\n",
    "    column_list.append('tdwgutility_organizedInClass')\n",
    "column_list.append('version_iri')\n",
    "\n",
    "# Create list of lists metadata table\n",
    "table_list = []\n",
    "for term_list in term_lists_info:\n",
    "    # retrieve versions metadata for term list\n",
    "    versions_url = githubBaseUri + term_list['database'] + '-versions/' + term_list['database'] + '-versions.csv'\n",
    "    versions_df = pd.read_csv(versions_url, na_filter=False)\n",
    "    \n",
    "    # retrieve current term metadata for term list\n",
    "    data_url = githubBaseUri + term_list['database'] + '/' + term_list['database'] + '.csv'\n",
    "    frame = pd.read_csv(data_url, na_filter=False)\n",
    "    for index,row in frame.iterrows():\n",
    "        row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['definition'], row['usage'], row['notes'], row['term_modified'], row['term_deprecated'], row['type']]\n",
    "        if vocab_type == 2:\n",
    "            row_list += [row['controlled_value_string']]\n",
    "        elif vocab_type == 3:\n",
    "            if row['skos_broader'] =='':\n",
    "                row_list += [row['controlled_value_string'], '']\n",
    "            else:\n",
    "                row_list += [row['controlled_value_string'], term_list['pref_ns_prefix'] + ':' + row['skos_broader']]\n",
    "        if organized_in_categories:\n",
    "            row_list.append(row['tdwgutility_organizedInClass'])\n",
    "\n",
    "        # Borrowed terms really don't have implemented versions. They may be lacking values for version_status.\n",
    "        # In their case, their version IRI will be omitted.\n",
    "        found = False\n",
    "        for vindex, vrow in versions_df.iterrows():\n",
    "            if vrow['term_localName']==row['term_localName'] and vrow['version_status']=='recommended':\n",
    "                found = True\n",
    "                version_iri = vrow['version']\n",
    "                # NOTE: the current hack for non-TDWG terms without a version is to append # to the end of the term IRI\n",
    "                if version_iri[len(version_iri)-1] == '#':\n",
    "                    version_iri = ''\n",
    "        if not found:\n",
    "            version_iri = ''\n",
    "        row_list.append(version_iri)\n",
    "\n",
    "        table_list.append(row_list)\n",
    "\n",
    "# Turn list of lists into dataframe\n",
    "terms_df = pd.DataFrame(table_list, columns = column_list)\n",
    "\n",
    "terms_sorted_by_label = terms_df.sort_values(by='label')\n",
    "terms_sorted_by_localname = terms_df.sort_values(by='term_localName')\n",
    "terms_sorted_by_label"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the following cell to generate an index sorted alphabetically by lowercase term local name. Omit this index if the terms have opaque local names."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate the index of terms grouped by category and sorted alphabetically by lowercase term local name\n",
    "\n",
    "text = '### 3.1 Index By Term Name\\n\\n'\n",
    "text += '(See also [3.2 Index By Label](#32-index-by-label))\\n\\n'\n",
    "for category in range(0,len(display_order)):\n",
    "    text += '**' + display_label[category] + '**\\n'\n",
    "    text += '\\n'\n",
    "    if organized_in_categories:\n",
    "        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",
    "        filtered_table.reset_index(drop=True, inplace=True)\n",
    "    else:\n",
    "        filtered_table = terms_sorted_by_localname\n",
    "        filtered_table.reset_index(drop=True, inplace=True)\n",
    "        \n",
    "    for row_index,row in filtered_table.iterrows():\n",
    "        curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
    "        curie_anchor = curie.replace(':','_')\n",
    "        text += '[' + curie + '](#' + curie_anchor + ')'\n",
    "        if row_index < len(filtered_table) - 1:\n",
    "            text += ' |'\n",
    "        text += '\\n'\n",
    "    text += '\\n'\n",
    "index_by_name = text\n",
    "\n",
    "print(index_by_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the following cell to generate an index by term label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = '\\n\\n'\n",
    "\n",
    "# Comment out the following two lines if there is no index by local names\n",
    "#text = '### 3.2 Index By Label\\n\\n'\n",
    "#text += '(See also [3.1 Index By Term Name](#31-index-by-term-name))\\n\\n'\n",
    "for category in range(0,len(display_order)):\n",
    "    if organized_in_categories:\n",
    "        text += '**' + display_label[category] + '**\\n'\n",
    "        text += '\\n'\n",
    "        filtered_table = terms_sorted_by_label[terms_sorted_by_label['tdwgutility_organizedInClass']==display_order[category]]\n",
    "        filtered_table.reset_index(drop=True, inplace=True)\n",
    "    else:\n",
    "        filtered_table = terms_sorted_by_label\n",
    "        filtered_table.reset_index(drop=True, inplace=True)\n",
    "        \n",
    "    for row_index,row in filtered_table.iterrows():\n",
    "        if row_index == 0 or (row_index != 0 and row['label'] != filtered_table.iloc[row_index - 1].loc['label']): # this is a hack to prevent duplicate labels\n",
    "            curie_anchor = row['pref_ns_prefix'] + \"_\" + row['term_localName']\n",
    "            text += '[' + row['label'] + '](#' + curie_anchor + ')'\n",
    "            if row_index < len(filtered_table) - 2 or (row_index == len(filtered_table) - 2 and row['label'] != filtered_table.iloc[row_index + 1].loc['label']):\n",
    "                text += ' |'\n",
    "            text += '\\n'\n",
    "    text += '\\n'\n",
    "index_by_label = text\n",
    "\n",
    "print(index_by_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "decisions_df = pd.read_csv('https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/decisions/decisions-links.csv', na_filter=False)\n",
    "\n",
    "# generate a table for each term, with terms grouped by category\n",
    "\n",
    "# generate the Markdown for the terms table\n",
    "text = '## 4 Vocabulary\\n'\n",
    "for category in range(0,len(display_order)):\n",
    "    if organized_in_categories:\n",
    "        text += '### 4.' + str(category + 1) + ' ' + display_label[category] + '\\n'\n",
    "        text += '\\n'\n",
    "        text += display_comments[category] # insert the comments for the category, if any.\n",
    "        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",
    "        filtered_table.reset_index(drop=True, inplace=True)\n",
    "    else:\n",
    "        filtered_table = terms_sorted_by_localname\n",
    "        filtered_table.reset_index(drop=True, inplace=True)\n",
    "\n",
    "    for row_index,row in filtered_table.iterrows():\n",
    "        text += '<table>\\n'\n",
    "        curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
    "        curieAnchor = curie.replace(':','_')\n",
    "        text += '\\t<thead>\\n'\n",
    "        text += '\\t\\t<tr>\\n'\n",
    "        text += '\\t\\t\\t<th colspan=\"2\"><a id=\"' + curieAnchor + '\"></a>Term Name  ' + curie + '</th>\\n'\n",
    "        text += '\\t\\t</tr>\\n'\n",
    "        text += '\\t</thead>\\n'\n",
    "        text += '\\t<tbody>\\n'\n",
    "        text += '\\t\\t<tr>\\n'\n",
    "        text += '\\t\\t\\t<td>Term IRI</td>\\n'\n",
    "        uri = row['pref_ns_uri'] + row['term_localName']\n",
    "        text += '\\t\\t\\t<td><a href=\"' + uri + '\">' + uri + '</a></td>\\n'\n",
    "        text += '\\t\\t</tr>\\n'\n",
    "        text += '\\t\\t<tr>\\n'\n",
    "        text += '\\t\\t\\t<td>Modified</td>\\n'\n",
    "        text += '\\t\\t\\t<td>' + row['term_modified'] + '</td>\\n'\n",
    "        text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if row['version_iri'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Term version IRI</td>\\n'\n",
    "            text += '\\t\\t\\t<td><a href=\"' + row['version_iri'] + '\">' + row['version_iri'] + '</a></td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        text += '\\t\\t<tr>\\n'\n",
    "        text += '\\t\\t\\t<td>Label</td>\\n'\n",
    "        text += '\\t\\t\\t<td>' + row['label'] + '</td>\\n'\n",
    "        text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if row['term_deprecated'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td></td>\\n'\n",
    "            text += '\\t\\t\\t<td><strong>This term is deprecated and should no longer be used.</strong></td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        text += '\\t\\t<tr>\\n'\n",
    "        text += '\\t\\t\\t<td>Definition</td>\\n'\n",
    "        text += '\\t\\t\\t<td>' + row['definition'] + '</td>\\n'\n",
    "        text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if row['usage'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Usage</td>\\n'\n",
    "            text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if row['notes'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Notes</td>\\n'\n",
    "            text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Controlled value</td>\\n'\n",
    "            text += '\\t\\t\\t<td>' + row['controlled_value_string'] + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Has broader concept</td>\\n'\n",
    "            curieAnchor = row['skos_broader'].replace(':','_')\n",
    "            text += '\\t\\t\\t<td><a href=\"#' + curieAnchor + '\">' + row['skos_broader'] + '</a></td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        text += '\\t\\t<tr>\\n'\n",
    "        text += '\\t\\t\\t<td>Type</td>\\n'\n",
    "        if row['type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':\n",
    "            text += '\\t\\t\\t<td>Property</td>\\n'\n",
    "        elif row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
    "            text += '\\t\\t\\t<td>Class</td>\\n'\n",
    "        elif row['type'] == 'http://www.w3.org/2004/02/skos/core#Concept':\n",
    "            text += '\\t\\t\\t<td>Concept</td>\\n'\n",
    "        else:\n",
    "            text += '\\t\\t\\t<td>' + row['type'] + '</td>\\n' # this should rarely happen\n",
    "        text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        # Look up decisions related to this term\n",
    "        for drow_index,drow in decisions_df.iterrows():\n",
    "            if drow['linked_affected_resource'] == uri:\n",
    "                text += '\\t\\t<tr>\\n'\n",
    "                text += '\\t\\t\\t<td>Executive Committee decision</td>\\n'\n",
    "                text += '\\t\\t\\t<td><a href=\"http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '\">http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '</a></td>\\n'\n",
    "                text += '\\t\\t</tr>\\n'                        \n",
    "\n",
    "        text += '\\t</tbody>\\n'\n",
    "        text += '</table>\\n'\n",
    "        text += '\\n'\n",
    "    text += '\\n'\n",
    "term_table = text\n",
    "\n",
    "print(term_table)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modify to display the indices that you want"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = index_by_label + term_table\n",
    "#text = index_by_name + index_by_label + term_table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read in header and footer, merge with terms table, and output\n",
    "\n",
    "headerObject = open(headerFileName, 'rt', encoding='utf-8')\n",
    "header = headerObject.read()\n",
    "headerObject.close()\n",
    "\n",
    "footerObject = open(footerFileName, 'rt', encoding='utf-8')\n",
    "footer = footerObject.read()\n",
    "footerObject.close()\n",
    "\n",
    "output = header + text + footer\n",
    "outputObject = open(outFileName, 'wt', encoding='utf-8')\n",
    "outputObject.write(output)\n",
    "outputObject.close()\n",
    "    \n",
    "print('done')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
create build scripts for three new establishmentMeans-related controlled vocabs 2020-10-19 03:42:32 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# Script to build Markdown pages that provide term metadata for simple vocabularies\n",`
			`"# Steve Baskauf 2020-06-28 CC0\n",`
			`"# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the rs.tdwg.org repo from the TDWG Github site\n",`
			`"\n",`
			`"# Note: this script calls a function from http_library.py, which requires importing the requests, csv, and json modules\n",`
			`"import re\n",`
			`"import requests # best library to manage HTTP transactions\n",`
			`"import csv # library to read/write/parse CSV files\n",`
			`"import json # library to convert JSON to Python data structures\n",`
			`"import pandas as pd\n",`
			`"\n",`
			`"# -----------------\n",`
			`"# Configuration section\n",`
			`"# -----------------\n",`
			`"\n",`
			`"# !!!! Note !!!!\n",`
			`"# This is an example of a simple vocabulary without categories. For a complex example\n",`
			`"# with multiple namespaces and several categories, see build-page-categories.ipynb\n",`
			`"\n",`
			`"# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub. In this example,\n",`
			`"# the branch is named \"pathway\"\n",`
			`"githubBaseUri = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/'\n",`
			`"\n",`
			`"headerFileName = 'termlist-header.md'\n",`
			`"footerFileName = 'termlist-footer.md'\n",`
			`"outFileName = '../../docs/pw/index.md'\n",`
			`"\n",`
			`"# This is a Python list of the database names of the term lists to be included in the document.\n",`
			`"termLists = ['pathway']\n",`
			`"\n",`
			`"# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ\n",`
			`"# However, there probably aren't any circumstances where mixed types will be used to generate the same page.\n",`
			`"vocab_type = 3 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy\n",`
			`"\n",`
			`"# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass\n",`
			`"# If so, those categories can be used to group terms in the generated term list document.\n",`
			`"organized_in_categories = False\n",`
			`"\n",`
			`"# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass\n",`
			`"# If not organized into categories, the value is irrelevant. There just needs to be one item in the list.\n",`
			`"display_order = ['']\n",`
			`"display_label = ['Vocabulary'] # these are the section labels for the categories in the page\n",`
			`"display_comments = [''] # these are the comments about the category to be appended following the section labels\n",`
			`"display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories\n",`
			`"\n",`
			`"# ---------------\n",`
			`"# Function definitions\n",`
			`"# ---------------\n",`
			`"\n",`
			`"# replace URL with link\n",`
			`"#\n",`
			`"def createLinks(text):\n",`
			`" def repl(match):\n",`
			`" if match.group(1)[-1] == '.':\n",`
			`" return '<a href=\"' + match.group(1)[:-1] + '\">' + match.group(1)[:-1] + '</a>.'\n",`
			`" return '<a href=\"' + match.group(1) + '\">' + match.group(1) + '</a>'\n",`
			`"\n",`
			`" pattern = '(https?://[^\\s,;\\)\"]*)'\n",`
			`" result = re.sub(pattern, repl, text)\n",`
			`" return result"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"term_lists_info = []\n",`
			`"\n",`
			`"frame = pd.read_csv(githubBaseUri + 'term-lists/term-lists.csv', na_filter=False)\n",`
			`"for termList in termLists:\n",`
			`" term_list_dict = {'list_iri': termList}\n",`
			`" term_list_dict = {'database': termList}\n",`
			`" for index,row in frame.iterrows():\n",`
			`" if row['database'] == termList:\n",`
			`" term_list_dict['pref_ns_prefix'] = row['vann_preferredNamespacePrefix']\n",`
			`" term_list_dict['pref_ns_uri'] = row['vann_preferredNamespaceUri']\n",`
			`" term_list_dict['list_iri'] = row['list']\n",`
			`" term_lists_info.append(term_list_dict)\n",`
			`"print(term_lists_info)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# Create column list\n",`
			`"column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'definition', 'usage', 'notes', 'term_modified', 'term_deprecated', 'type']\n",`
			`"if vocab_type == 2:\n",`
			`" column_list += ['controlled_value_string']\n",`
			`"elif vocab_type == 3:\n",`
			`" column_list += ['controlled_value_string', 'skos_broader']\n",`
			`"if organized_in_categories:\n",`
			`" column_list.append('tdwgutility_organizedInClass')\n",`
			`"column_list.append('version_iri')\n",`
			`"\n",`
			`"# Create list of lists metadata table\n",`
			`"table_list = []\n",`
			`"for term_list in term_lists_info:\n",`
			`" # retrieve versions metadata for term list\n",`
			`" versions_url = githubBaseUri + term_list['database'] + '-versions/' + term_list['database'] + '-versions.csv'\n",`
			`" versions_df = pd.read_csv(versions_url, na_filter=False)\n",`
			`" \n",`
			`" # retrieve current term metadata for term list\n",`
			`" data_url = githubBaseUri + term_list['database'] + '/' + term_list['database'] + '.csv'\n",`
			`" frame = pd.read_csv(data_url, na_filter=False)\n",`
			`" for index,row in frame.iterrows():\n",`
			`" row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['definition'], row['usage'], row['notes'], row['term_modified'], row['term_deprecated'], row['type']]\n",`
			`" if vocab_type == 2:\n",`
			`" row_list += [row['controlled_value_string']]\n",`
			`" elif vocab_type == 3:\n",`
			`" if row['skos_broader'] =='':\n",`
			`" row_list += [row['controlled_value_string'], '']\n",`
			`" else:\n",`
			`" row_list += [row['controlled_value_string'], term_list['pref_ns_prefix'] + ':' + row['skos_broader']]\n",`
			`" if organized_in_categories:\n",`
			`" row_list.append(row['tdwgutility_organizedInClass'])\n",`
			`"\n",`
			`" # Borrowed terms really don't have implemented versions. They may be lacking values for version_status.\n",`
			`" # In their case, their version IRI will be omitted.\n",`
			`" found = False\n",`
			`" for vindex, vrow in versions_df.iterrows():\n",`
			`" if vrow['term_localName']==row['term_localName'] and vrow['version_status']=='recommended':\n",`
			`" found = True\n",`
			`" version_iri = vrow['version']\n",`
			`" # NOTE: the current hack for non-TDWG terms without a version is to append # to the end of the term IRI\n",`
			`" if version_iri[len(version_iri)-1] == '#':\n",`
			`" version_iri = ''\n",`
			`" if not found:\n",`
			`" version_iri = ''\n",`
			`" row_list.append(version_iri)\n",`
			`"\n",`
			`" table_list.append(row_list)\n",`
			`"\n",`
			`"# Turn list of lists into dataframe\n",`
			`"terms_df = pd.DataFrame(table_list, columns = column_list)\n",`
			`"\n",`
			`"terms_sorted_by_label = terms_df.sort_values(by='label')\n",`
			`"terms_sorted_by_localname = terms_df.sort_values(by='term_localName')\n",`
			`"terms_sorted_by_label"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Run the following cell to generate an index sorted alphabetically by lowercase term local name. Omit this index if the terms have opaque local names."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# generate the index of terms grouped by category and sorted alphabetically by lowercase term local name\n",`
			`"\n",`
			`"text = '### 3.1 Index By Term Name\\n\\n'\n",`
			`"text += '(See also [3.2 Index By Label](#32-index-by-label))\\n\\n'\n",`
			`"for category in range(0,len(display_order)):\n",`
			`" text += '' + display_label[category] + '\\n'\n",`
			`" text += '\\n'\n",`
			`" if organized_in_categories:\n",`
			`" filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",`
			`" filtered_table.reset_index(drop=True, inplace=True)\n",`
			`" else:\n",`
			`" filtered_table = terms_sorted_by_localname\n",`
			`" filtered_table.reset_index(drop=True, inplace=True)\n",`
			`" \n",`
			`" for row_index,row in filtered_table.iterrows():\n",`
			`" curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",`
			`" curie_anchor = curie.replace(':','_')\n",`
			`" text += '[' + curie + '](#' + curie_anchor + ')'\n",`
			`" if row_index < len(filtered_table) - 1:\n",`
			`" text += ' \|'\n",`
			`" text += '\\n'\n",`
			`" text += '\\n'\n",`
			`"index_by_name = text\n",`
			`"\n",`
			`"print(index_by_name)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Run the following cell to generate an index by term label"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"text = '\\n\\n'\n",`
			`"\n",`
			`"# Comment out the following two lines if there is no index by local names\n",`
			`"#text = '### 3.2 Index By Label\\n\\n'\n",`
			`"#text += '(See also [3.1 Index By Term Name](#31-index-by-term-name))\\n\\n'\n",`
			`"for category in range(0,len(display_order)):\n",`
			`" if organized_in_categories:\n",`
			`" text += '' + display_label[category] + '\\n'\n",`
			`" text += '\\n'\n",`
			`" filtered_table = terms_sorted_by_label[terms_sorted_by_label['tdwgutility_organizedInClass']==display_order[category]]\n",`
			`" filtered_table.reset_index(drop=True, inplace=True)\n",`
			`" else:\n",`
			`" filtered_table = terms_sorted_by_label\n",`
			`" filtered_table.reset_index(drop=True, inplace=True)\n",`
			`" \n",`
			`" for row_index,row in filtered_table.iterrows():\n",`
			`" if row_index == 0 or (row_index != 0 and row['label'] != filtered_table.iloc[row_index - 1].loc['label']): # this is a hack to prevent duplicate labels\n",`
			`" curie_anchor = row['pref_ns_prefix'] + \"_\" + row['term_localName']\n",`
			`" text += '[' + row['label'] + '](#' + curie_anchor + ')'\n",`
			`" if row_index < len(filtered_table) - 2 or (row_index == len(filtered_table) - 2 and row['label'] != filtered_table.iloc[row_index + 1].loc['label']):\n",`
			`" text += ' \|'\n",`
			`" text += '\\n'\n",`
			`" text += '\\n'\n",`
			`"index_by_label = text\n",`
			`"\n",`
			`"print(index_by_label)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"decisions_df = pd.read_csv('https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/decisions/decisions-links.csv', na_filter=False)\n",`
			`"\n",`
			`"# generate a table for each term, with terms grouped by category\n",`
			`"\n",`
			`"# generate the Markdown for the terms table\n",`
			`"text = '## 4 Vocabulary\\n'\n",`
			`"for category in range(0,len(display_order)):\n",`
			`" if organized_in_categories:\n",`
			`" text += '### 4.' + str(category + 1) + ' ' + display_label[category] + '\\n'\n",`
			`" text += '\\n'\n",`
			`" text += display_comments[category] # insert the comments for the category, if any.\n",`
			`" filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]\n",`
			`" filtered_table.reset_index(drop=True, inplace=True)\n",`
			`" else:\n",`
			`" filtered_table = terms_sorted_by_localname\n",`
			`" filtered_table.reset_index(drop=True, inplace=True)\n",`
			`"\n",`
			`" for row_index,row in filtered_table.iterrows():\n",`
			`" text += '<table>\\n'\n",`
			`" curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",`
			`" curieAnchor = curie.replace(':','_')\n",`
			`" text += '\\t<thead>\\n'\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<th colspan=\"2\"><a id=\"' + curieAnchor + '\"></a>Term Name ' + curie + '</th>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`" text += '\\t</thead>\\n'\n",`
			`" text += '\\t<tbody>\\n'\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Term IRI</td>\\n'\n",`
			`" uri = row['pref_ns_uri'] + row['term_localName']\n",`
			`" text += '\\t\\t\\t<td><a href=\"' + uri + '\">' + uri + '</a></td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
Add missing open <tr> tag in CV build scripts 2020-10-29 12:38:09 +00:00			`" text += '\\t\\t<tr>\\n'\n",`
create build scripts for three new establishmentMeans-related controlled vocabs 2020-10-19 03:42:32 +00:00			`" text += '\\t\\t\\t<td>Modified</td>\\n'\n",`
			`" text += '\\t\\t\\t<td>' + row['term_modified'] + '</td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" if row['version_iri'] != '':\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Term version IRI</td>\\n'\n",`
			`" text += '\\t\\t\\t<td><a href=\"' + row['version_iri'] + '\">' + row['version_iri'] + '</a></td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Label</td>\\n'\n",`
			`" text += '\\t\\t\\t<td>' + row['label'] + '</td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" if row['term_deprecated'] != '':\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td></td>\\n'\n",`
			`" text += '\\t\\t\\t<td><strong>This term is deprecated and should no longer be used.</strong></td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Definition</td>\\n'\n",`
			`" text += '\\t\\t\\t<td>' + row['definition'] + '</td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" if row['usage'] != '':\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Usage</td>\\n'\n",`
			`" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" if row['notes'] != '':\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Notes</td>\\n'\n",`
			`" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Controlled value</td>\\n'\n",`
			`" text += '\\t\\t\\t<td>' + row['controlled_value_string'] + '</td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Has broader concept</td>\\n'\n",`
			`" curieAnchor = row['skos_broader'].replace(':','_')\n",`
			`" text += '\\t\\t\\t<td><a href=\"#' + curieAnchor + '\">' + row['skos_broader'] + '</a></td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Type</td>\\n'\n",`
			`" if row['type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':\n",`
			`" text += '\\t\\t\\t<td>Property</td>\\n'\n",`
			`" elif row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",`
			`" text += '\\t\\t\\t<td>Class</td>\\n'\n",`
			`" elif row['type'] == 'http://www.w3.org/2004/02/skos/core#Concept':\n",`
			`" text += '\\t\\t\\t<td>Concept</td>\\n'\n",`
			`" else:\n",`
			`" text += '\\t\\t\\t<td>' + row['type'] + '</td>\\n' # this should rarely happen\n",`
			`" text += '\\t\\t</tr>\\n'\n",`
			`"\n",`
			`" # Look up decisions related to this term\n",`
			`" for drow_index,drow in decisions_df.iterrows():\n",`
			`" if drow['linked_affected_resource'] == uri:\n",`
			`" text += '\\t\\t<tr>\\n'\n",`
			`" text += '\\t\\t\\t<td>Executive Committee decision</td>\\n'\n",`
			`" text += '\\t\\t\\t<td><a href=\"http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '\">http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '</a></td>\\n'\n",`
			`" text += '\\t\\t</tr>\\n' \n",`
			`"\n",`
			`" text += '\\t</tbody>\\n'\n",`
			`" text += '</table>\\n'\n",`
			`" text += '\\n'\n",`
			`" text += '\\n'\n",`
			`"term_table = text\n",`
			`"\n",`
			`"print(term_table)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Modify to display the indices that you want"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"text = index_by_label + term_table\n",`
			`"#text = index_by_name + index_by_label + term_table"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# read in header and footer, merge with terms table, and output\n",`
			`"\n",`
			`"headerObject = open(headerFileName, 'rt', encoding='utf-8')\n",`
			`"header = headerObject.read()\n",`
			`"headerObject.close()\n",`
			`"\n",`
			`"footerObject = open(footerFileName, 'rt', encoding='utf-8')\n",`
			`"footer = footerObject.read()\n",`
			`"footerObject.close()\n",`
			`"\n",`
			`"output = header + text + footer\n",`
			`"outputObject = open(outFileName, 'wt', encoding='utf-8')\n",`
			`"outputObject.write(output)\n",`
			`"outputObject.close()\n",`
			`" \n",`
			`"print('done')"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.7.1"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`