Merge branch 'master' into complete_issue_265

This commit is contained in:
Peter Desmet 2020-08-20 10:58:54 +02:00 committed by GitHub
commit 5024b29467
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 393 additions and 129 deletions

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -78,19 +78,9 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Retrieving term list metadata from GitHub\n",
"[{'database': 'terms', 'pref_ns_prefix': 'dwc', 'pref_ns_uri': 'http://rs.tdwg.org/dwc/terms/', 'list_iri': 'http://rs.tdwg.org/dwc/terms/'}, {'database': 'iri', 'pref_ns_prefix': 'dwciri', 'pref_ns_uri': 'http://rs.tdwg.org/dwc/iri/', 'list_iri': 'http://rs.tdwg.org/dwc/iri/'}, {'database': 'dc-for-dwc', 'pref_ns_prefix': 'dc', 'pref_ns_uri': 'http://purl.org/dc/elements/1.1/', 'list_iri': 'http://rs.tdwg.org/dwc/dc/'}, {'database': 'dcterms-for-dwc', 'pref_ns_prefix': 'dcterms', 'pref_ns_uri': 'http://purl.org/dc/terms/', 'list_iri': 'http://rs.tdwg.org/dwc/dcterms/'}]\n",
"\n"
]
}
],
"outputs": [],
"source": [
"# ---------------\n",
"# Retrieve term list metadata from GitHub\n",
@ -115,19 +105,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Retrieving metadata about terms from all namespaces from GitHub\n",
"done retrieving\n",
"\n"
]
}
],
"outputs": [],
"source": [
"# ---------------\n",
"# Create metadata table and populate using data from namespace databases in GitHub\n",
@ -206,18 +186,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating term index by CURIE\n",
"\n"
]
}
],
"outputs": [],
"source": [
"# ---------------\n",
"# generate the index of terms grouped by category and sorted alphabetically by lowercase term local name\n",
@ -233,11 +204,9 @@
" if row['rdf_type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
" curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
" curie_anchor = curie.replace(':','_')\n",
" text += '[' + curie + '](#' + curie_anchor + ')'\n",
" if row_index < len(terms_sorted_by_localname) - 1:\n",
" text += ' |'\n",
" text += '\\n'\n",
"text += '\\n'\n",
" text += '[' + curie + '](#' + curie_anchor + ') |\\n'\n",
"text = text[:len(text)-2] # remove final trailing vertical bar and newline\n",
"text += '\\n\\n' # put back removed newline\n",
"\n",
"for category in range(0,len(display_order)):\n",
" text += '**' + display_label[category] + '**\\n'\n",
@ -252,11 +221,10 @@
" if row['rdf_type'] != 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
" curie = row['pref_ns_prefix'] + \":\" + row['term_localName']\n",
" curie_anchor = curie.replace(':','_')\n",
" text += '[' + curie + '](#' + curie_anchor + ')'\n",
" if row_index < len(filtered_table) - 1:\n",
" text += ' |'\n",
" text += '\\n'\n",
" text += '\\n'\n",
" text += '[' + curie + '](#' + curie_anchor + ') |\\n'\n",
" text = text[:len(text)-2] # remove final trailing vertical bar and newline\n",
" text += '\\n\\n' # put back removed newline\n",
"\n",
"index_by_name = text\n",
"\n",
"#print(index_by_name)\n",
@ -272,18 +240,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating term index by label\n",
"\n"
]
}
],
"outputs": [],
"source": [
"# ---------------\n",
"# generate the index of terms by label\n",
@ -301,11 +260,9 @@
"for row_index,row in terms_sorted_by_label.iterrows():\n",
" if row['rdf_type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
" curie_anchor = row['pref_ns_prefix'] + \"_\" + row['term_localName']\n",
" text += '[' + row['label'] + '](#' + curie_anchor + ')'\n",
" if row_index < len(terms_sorted_by_label) - 1:\n",
" text += ' |'\n",
" text += '\\n'\n",
"text += '\\n'\n",
" text += '[' + row['label'] + '](#' + curie_anchor + ') |\\n'\n",
"text = text[:len(text)-2] # remove final trailing vertical bar and newline\n",
"text += '\\n\\n' # put back removed newline\n",
"\n",
"for category in range(0,len(display_order)):\n",
" if organized_in_categories:\n",
@ -320,11 +277,10 @@
" if row_index == 0 or (row_index != 0 and row['label'] != filtered_table.iloc[row_index - 1].loc['label']): # this is a hack to prevent duplicate labels\n",
" if row['rdf_type'] != 'http://www.w3.org/2000/01/rdf-schema#Class':\n",
" curie_anchor = row['pref_ns_prefix'] + \"_\" + row['term_localName']\n",
" text += '[' + row['label'] + '](#' + curie_anchor + ')'\n",
" if row_index < len(filtered_table) - 2 or (row_index == len(filtered_table) - 2 and row['label'] != filtered_table.iloc[row_index + 1].loc['label']):\n",
" text += ' |'\n",
" text += '\\n'\n",
" text += '\\n'\n",
" text += '[' + row['label'] + '](#' + curie_anchor + ') |\\n'\n",
" text = text[:len(text)-2] # remove final trailing vertical bar and newline\n",
" text += '\\n\\n' # put back removed newline\n",
"\n",
"index_by_label = text\n",
"print()\n",
"\n",
@ -333,19 +289,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating terms table\n",
"done generating\n",
"\n"
]
}
],
"outputs": [],
"source": [
"decisions_df = pd.read_csv('https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/decisions/decisions-links.csv', na_filter=False)\n",
"\n",
@ -384,6 +330,7 @@
" uri = row['pref_ns_uri'] + row['term_localName']\n",
" text += '\\t\\t\\t<td><a href=\"' + uri + '\">' + uri + '</a></td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
" text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Modified</td>\\n'\n",
" text += '\\t\\t\\t<td>' + row['term_modified'] + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n",
@ -496,17 +443,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Merging term table with header and footer and saving file\n"
]
}
],
"outputs": [],
"source": [
"# ---------------\n",
"# Merge term table with header and footer Markdown, then save file\n",
@ -519,17 +458,9 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"done\n"
]
}
],
"outputs": [],
"source": [
"# read in header and footer, merge with terms table, and output\n",
"\n",

File diff suppressed because it is too large Load Diff