diff --git a/build/doe-cv-build/doe_build.py b/build/doe-cv-build/doe_build.py new file mode 100644 index 0000000..9ac35db --- /dev/null +++ b/build/doe-cv-build/doe_build.py @@ -0,0 +1,325 @@ +# Script to build Markdown pages that provide term metadata for simple vocabularies +# Steve Baskauf 2020-06-28 CC0 +# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the rs.tdwg.org repo from the TDWG Github site + +# Note: this script calls a function from http_library.py, which requires importing the requests, csv, and json modules +import re +import requests # best library to manage HTTP transactions +import csv # library to read/write/parse CSV files +import json # library to convert JSON to Python data structures +import pandas as pd + +# ----------------- +# Configuration section +# ----------------- + +# !!!! Note !!!! +# This is an example of a simple vocabulary without categories. For a complex example +# with multiple namespaces and several categories, see build-page-categories.ipynb + +# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub. In this example, +# the branch is named "pathway" +githubBaseUri = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/' + +headerFileName = 'termlist-header.md' +footerFileName = 'termlist-footer.md' +outFileName = '../../docs/doe/index.md' + +# This is a Python list of the database names of the term lists to be included in the document. +termLists = ['degreeOfEstablishment'] + +# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ +# However, there probably aren't any circumstances where mixed types will be used to generate the same page. +vocab_type = 2 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy + +# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass +# If so, those categories can be used to group terms in the generated term list document. +organized_in_categories = False + +# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass +# If not organized into categories, the value is irrelevant. There just needs to be one item in the list. +display_order = [''] +display_label = ['Vocabulary'] # these are the section labels for the categories in the page +display_comments = [''] # these are the comments about the category to be appended following the section labels +display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories + +# --------------- +# Function definitions +# --------------- + +# replace URL with link +# +def createLinks(text): + def repl(match): + if match.group(1)[-1] == '.': + return '' + match.group(1)[:-1] + '.' + return '' + match.group(1) + '' + + pattern = '(https?://[^\s,;\)"]*)' + result = re.sub(pattern, repl, text) + return result + +# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey +def convert_code(text_with_backticks): + """Takes all back-quoted sections in a text field and converts it to + the html tagged version of code blocks ... + """ + return re.sub(r'`([^`]*)`', r'\1', text_with_backticks) + +def convert_link(text_with_urls): + """Takes all links in a text field and converts it to the html tagged + version of the link + """ + def _handle_matched(inputstring): + """quick hack version of url handling on the current prime versions data""" + url = inputstring.group() + return "{}".format(url, url) + + regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?\n' + curie = row['pref_ns_prefix'] + ":" + row['term_localName'] + curieAnchor = curie.replace(':','_') + text += '\t\n' + text += '\t\t\n' + text += '\t\t\tTerm Name ' + curie + '\n' + text += '\t\t\n' + text += '\t\n' + text += '\t\n' + text += '\t\t\n' + text += '\t\t\tTerm IRI\n' + uri = row['pref_ns_uri'] + row['term_localName'] + text += '\t\t\t' + uri + '\n' + text += '\t\t\n' + text += '\t\t\n' + text += '\t\t\tModified\n' + text += '\t\t\t' + row['term_modified'] + '\n' + text += '\t\t\n' + + if row['version_iri'] != '': + text += '\t\t\n' + text += '\t\t\tTerm version IRI\n' + text += '\t\t\t' + row['version_iri'] + '\n' + text += '\t\t\n' + + text += '\t\t\n' + text += '\t\t\tLabel\n' + text += '\t\t\t' + row['label'] + '\n' + text += '\t\t\n' + + if row['term_deprecated'] != '': + text += '\t\t\n' + text += '\t\t\t\n' + text += '\t\t\tThis term is deprecated and should no longer be used.\n' + text += '\t\t\n' + + text += '\t\t\n' + text += '\t\t\tDefinition\n' + text += '\t\t\t' + row['definition'] + '\n' + text += '\t\t\n' + + if row['usage'] != '': + text += '\t\t\n' + text += '\t\t\tUsage\n' + text += '\t\t\t' + convert_link(convert_code(row['usage'])) + '\n' + text += '\t\t\n' + + if row['notes'] != '': + text += '\t\t\n' + text += '\t\t\tNotes\n' + text += '\t\t\t' + convert_link(convert_code(row['notes'])) + '\n' + text += '\t\t\n' + + if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary + text += '\t\t\n' + text += '\t\t\tControlled value\n' + text += '\t\t\t' + row['controlled_value_string'] + '\n' + text += '\t\t\n' + + if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships + text += '\t\t\n' + text += '\t\t\tHas broader concept\n' + curieAnchor = row['skos_broader'].replace(':','_') + text += '\t\t\t' + row['skos_broader'] + '\n' + text += '\t\t\n' + + text += '\t\t\n' + text += '\t\t\tType\n' + if row['type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property': + text += '\t\t\tProperty\n' + elif row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class': + text += '\t\t\tClass\n' + elif row['type'] == 'http://www.w3.org/2004/02/skos/core#Concept': + text += '\t\t\tConcept\n' + else: + text += '\t\t\t' + row['type'] + '\n' # this should rarely happen + text += '\t\t\n' + + # Look up decisions related to this term + for drow_index,drow in decisions_df.iterrows(): + if drow['linked_affected_resource'] == uri: + text += '\t\t\n' + text += '\t\t\tExecutive Committee decision\n' + text += '\t\t\thttp://rs.tdwg.org/decisions/' + drow['decision_localName'] + '\n' + text += '\t\t\n' + + text += '\t\n' + text += '\n' + text += '\n' + text += '\n' +term_table = text + +text = index_by_label + term_table + +# read in header and footer, merge with terms table, and output + +headerObject = open(headerFileName, 'rt', encoding='utf-8') +header = headerObject.read() +headerObject.close() + +footerObject = open(footerFileName, 'rt', encoding='utf-8') +footer = footerObject.read() +footerObject.close() + +output = header + text + footer +outputObject = open(outFileName, 'wt', encoding='utf-8') +outputObject.write(output) +outputObject.close() + +print('done') diff --git a/docs/doe/index.md b/docs/doe/index.md index 49eec8f..8fe83cd 100644 --- a/docs/doe/index.md +++ b/docs/doe/index.md @@ -120,11 +120,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d001-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d001-2021-09-01 Label @@ -132,11 +132,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Not transported beyond limits of native range + Not transported beyond limits of native range. Notes - Considered native and naturally occuring. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category A + Considered native and naturally occurring. See also "category A" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -166,11 +166,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d002-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d002-2021-09-01 Label @@ -178,15 +178,15 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Individuals in captivity or quarantine (i.e. individuals provided with conditions suitable for them, but explicit measures of containment are in place) + Individuals in captivity or quarantine (i.e., individuals provided with conditions suitable for them, but explicit measures of containment are in place). Usage - Only for cases where specific actions have been taken place to prevent escape of individuals or propagules + Only for cases where specific actions have been taken place to prevent escape of individuals or propagules. Notes - See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category B1 + See also "category B1" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -216,11 +216,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d003-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d003-2021-09-01 Label @@ -228,11 +228,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Individuals in cultivation (i.e. individuals provided with conditions suitable for them, but explicit measures to prevent dispersal are limited at best) + Individuals in cultivation (i.e., individuals provided with conditions suitable for them, but explicit measures to prevent dispersal are limited at best). Notes - Examples include gardens, parks and farms. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category B2 + Examples include gardens, parks and farms. See also "category B2" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -262,11 +262,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d004-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d004-2021-09-01 Label @@ -274,11 +274,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Individuals directly released into novel environment + Individuals directly released into novel environment. Notes - For example, fish stocked for angling, birds for hunting. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category B3 + For example, fish stocked for angling, birds for hunting. See also "category B3" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -308,11 +308,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d005-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d005-2021-09-01 Label @@ -320,11 +320,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Individuals released outside of captivity or cultivation in a location, but incapable of surviving for a significant period + Individuals released outside of captivity or cultivation in a location, but incapable of surviving for a significant period. Notes - Such as frost tender plants sown or planted in a cold climate. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category C0 + For example, frost-tender plants sown or planted in a cold climate. See also "category C0" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -354,11 +354,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d006-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d006-2021-09-01 Label @@ -366,11 +366,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Individuals surviving outside of captivity or cultivation in a location, no reproduction + Individuals surviving outside of captivity or cultivation in a location with no reproduction. Notes - Trees planted in the wild for forestry or ornament may come under this category. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category C1 + Trees planted in the wild for forestry or ornament may come under this category. See also "category C1" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -400,11 +400,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d007-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d007-2021-09-01 Label @@ -412,11 +412,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Individuals surviving outside of captivity or cultivation in a location, reproduction is occurring, but population not self-sustaining + Individuals surviving outside of captivity or cultivation in a location. Reproduction is occurring, but population not self-sustaining. Notes - Offspring are produced, but these either do not survive or are fertile enough to maintain the population. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category C2 + Offspring are produced, but these either do not survive or are not fertile enough to maintain the population. See also "category C2" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -446,11 +446,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d008-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d008-2021-09-01 Label @@ -458,11 +458,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Individuals surviving outside of captivity or cultivation in a location, reproduction occurring, and population self-sustaining + Individuals surviving outside of captivity or cultivation in a location. Reproduction occurring, and population self-sustaining. Notes - The population is maintained by reproduction, but is not spreading. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category C3 + The population is maintained by reproduction, but is not spreading. See also "category C3" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -492,11 +492,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d009-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d009-2021-09-01 Label @@ -504,11 +504,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Self-sustaining population outside of captivity or cultivation, with individuals surviving a significant distance from the original point of introduction + Self-sustaining population outside of captivity or cultivation, with individuals surviving a significant distance from the original point of introduction. Notes - The population is maintained by reproduction and is spreading. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category D1 + The population is maintained by reproduction and is spreading. See also "category D1" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -538,11 +538,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d010-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d010-2021-09-01 Label @@ -550,11 +550,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Self-sustaining population outside of captivity or cultivation, with individuals surviving and reproducing a significant distance from the original point of introduction + Self-sustaining population outside of captivity or cultivation, with individuals surviving and reproducing a significant distance from the original point of introduction. Notes - The population is maintained by reproduction, is spreading, and their progeny is also reproducing and spreading. See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category D2 + The population is maintained by reproduction, is spreading, and its progeny are also reproducing and spreading. See also "category D2" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value @@ -584,11 +584,11 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Modified - 2020-10-13 + 2021-09-01 Term version IRI - http://rs.tdwg.org/dwcdoe/values/version/d011-2020-10-13 + http://rs.tdwg.org/dwcdoe/values/version/d011-2021-09-01 Label @@ -596,15 +596,15 @@ Due to the requirements of [Section 1.4.3 of the Darwin Core RDF Guide](http://r Definition - Fully invasive species, with individuals dispersing, surviving and reproducing at multiple sites across a greater or lesser spectrum of habitats and extent of occurrence + Fully invasive species, with individuals dispersing, surviving and reproducing at multiple sites across a spectrum of habitats and geographic range. Usage - This term is only used for those invasives with the highest degree of encroachment + This term is only used for those invasives with the highest degree of encroachment. Notes - See also Blackburn et al. 2011 https://doi.org/10.1016/j.tree.2011.03.023 category E + See also "category E" in Blackburn et al. 2011. https://doi.org/10.1016/j.tree.2011.03.023 Controlled value