{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Script to build Markdown pages that provide term metadata for simple vocabularies\n", "# Steve Baskauf 2020-06-28 CC0\n", "# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the rs.tdwg.org repo from the TDWG Github site\n", "\n", "# Note: this script calls a function from http_library.py, which requires importing the requests, csv, and json modules\n", "import re\n", "import requests # best library to manage HTTP transactions\n", "import csv # library to read/write/parse CSV files\n", "import json # library to convert JSON to Python data structures\n", "import pandas as pd\n", "\n", "# -----------------\n", "# Configuration section\n", "# -----------------\n", "\n", "# !!!! Note !!!!\n", "# This is an example of a simple vocabulary without categories. For a complex example\n", "# with multiple namespaces and several categories, see build-page-categories.ipynb\n", "\n", "# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub. In this example,\n", "# the branch is named \"pathway\"\n", "githubBaseUri = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/'\n", "\n", "headerFileName = 'termlist-header.md'\n", "footerFileName = 'termlist-footer.md'\n", "outFileName = '../../docs/doe/index.md'\n", "\n", "# This is a Python list of the database names of the term lists to be included in the document.\n", "termLists = ['degreeOfEstablishment']\n", "\n", "# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ\n", "# However, there probably aren't any circumstances where mixed types will be used to generate the same page.\n", "vocab_type = 2 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy\n", "\n", "# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass\n", "# If so, those categories can be used to group terms in the generated term list document.\n", "organized_in_categories = False\n", "\n", "# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass\n", "# If not organized into categories, the value is irrelevant. There just needs to be one item in the list.\n", "display_order = ['']\n", "display_label = ['Vocabulary'] # these are the section labels for the categories in the page\n", "display_comments = [''] # these are the comments about the category to be appended following the section labels\n", "display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories\n", "\n", "# ---------------\n", "# Function definitions\n", "# ---------------\n", "\n", "# replace URL with link\n", "#\n", "def createLinks(text):\n", " def repl(match):\n", " if match.group(1)[-1] == '.':\n", " return '' + match.group(1)[:-1] + '.'\n", " return '' + match.group(1) + ''\n", "\n", " pattern = '(https?://[^\\s,;\\)\"]*)'\n", " result = re.sub(pattern, repl, text)\n", " return result\n", "\n", "# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n", "def convert_code(text_with_backticks):\n", " \"\"\"Takes all back-quoted sections in a text field and converts it to\n", " the html tagged version of code blocks ...\n", " \"\"\"\n", " return re.sub(r'`([^`]*)`', r'\\1', text_with_backticks)\n", "\n", "def convert_link(text_with_urls):\n", " \"\"\"Takes all links in a text field and converts it to the html tagged\n", " version of the link\n", " \"\"\"\n", " def _handle_matched(inputstring):\n", " \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n", " url = inputstring.group()\n", " return \"{}\".format(url, url)\n", "\n", " regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?\\n'\n", " curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n", " curieAnchor = curie.replace(':','_')\n", " text += '\\t\\n'\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tTerm Name ' + curie + '\\n'\n", " text += '\\t\\t\\n'\n", " text += '\\t\\n'\n", " text += '\\t\\n'\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tTerm IRI\\n'\n", " uri = row['pref_ns_uri'] + row['term_localName']\n", " text += '\\t\\t\\t' + uri + '\\n'\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tModified\\n'\n", " text += '\\t\\t\\t' + row['term_modified'] + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " if row['version_iri'] != '':\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tTerm version IRI\\n'\n", " text += '\\t\\t\\t' + row['version_iri'] + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tLabel\\n'\n", " text += '\\t\\t\\t' + row['label'] + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " if row['term_deprecated'] != '':\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\t\\n'\n", " text += '\\t\\t\\tThis term is deprecated and should no longer be used.\\n'\n", " text += '\\t\\t\\n'\n", "\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tDefinition\\n'\n", " text += '\\t\\t\\t' + row['definition'] + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " if row['usage'] != '':\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tUsage\\n'\n", " text += '\\t\\t\\t' + convert_link(convert_code(row['usage'])) + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " if row['notes'] != '':\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tNotes\\n'\n", " text += '\\t\\t\\t' + convert_link(convert_code(row['notes'])) + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tControlled value\\n'\n", " text += '\\t\\t\\t' + row['controlled_value_string'] + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tHas broader concept\\n'\n", " curieAnchor = row['skos_broader'].replace(':','_')\n", " text += '\\t\\t\\t' + row['skos_broader'] + '\\n'\n", " text += '\\t\\t\\n'\n", "\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tType\\n'\n", " if row['type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':\n", " text += '\\t\\t\\tProperty\\n'\n", " elif row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n", " text += '\\t\\t\\tClass\\n'\n", " elif row['type'] == 'http://www.w3.org/2004/02/skos/core#Concept':\n", " text += '\\t\\t\\tConcept\\n'\n", " else:\n", " text += '\\t\\t\\t' + row['type'] + '\\n' # this should rarely happen\n", " text += '\\t\\t\\n'\n", "\n", " # Look up decisions related to this term\n", " for drow_index,drow in decisions_df.iterrows():\n", " if drow['linked_affected_resource'] == uri:\n", " text += '\\t\\t\\n'\n", " text += '\\t\\t\\tExecutive Committee decision\\n'\n", " text += '\\t\\t\\thttp://rs.tdwg.org/decisions/' + drow['decision_localName'] + '\\n'\n", " text += '\\t\\t\\n' \n", "\n", " text += '\\t\\n'\n", " text += '\\n'\n", " text += '\\n'\n", " text += '\\n'\n", "term_table = text\n", "\n", "print(term_table)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Modify to display the indices that you want" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "text = index_by_label + term_table\n", "#text = index_by_name + index_by_label + term_table" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# read in header and footer, merge with terms table, and output\n", "\n", "headerObject = open(headerFileName, 'rt', encoding='utf-8')\n", "header = headerObject.read()\n", "headerObject.close()\n", "\n", "footerObject = open(footerFileName, 'rt', encoding='utf-8')\n", "footer = footerObject.read()\n", "footerObject.close()\n", "\n", "output = header + text + footer\n", "outputObject = open(outFileName, 'wt', encoding='utf-8')\n", "outputObject.write(output)\n", "outputObject.close()\n", " \n", "print('done')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }