mirror of https://github.com/tdwg/dwc.git
Implement patched link/code tagging in all build scripts
This commit is contained in:
parent
686fb33dd8
commit
3d82ba9f41
|
@ -57,7 +57,7 @@ display_id = ['record_level', 'dc', 'dcterms', 'occurrence', 'organism', 'materi
|
||||||
# ---------------
|
# ---------------
|
||||||
|
|
||||||
# replace URL with link (function used with Audubon Core list of terms build script)
|
# replace URL with link (function used with Audubon Core list of terms build script)
|
||||||
# Does not correctly handle URLs with close parens ) characters.
|
# Does not correctly handle URLs with close parens ) characters, so no longer used.
|
||||||
#
|
#
|
||||||
def createLinks(text):
|
def createLinks(text):
|
||||||
def repl(match):
|
def repl(match):
|
||||||
|
@ -69,15 +69,13 @@ def createLinks(text):
|
||||||
result = re.sub(pattern, repl, text)
|
result = re.sub(pattern, repl, text)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# 2021-08-05 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey
|
# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey
|
||||||
def convert_code(text_with_backticks):
|
def convert_code(text_with_backticks):
|
||||||
"""Takes all back-quoted sections in a text field and converts it to
|
"""Takes all back-quoted sections in a text field and converts it to
|
||||||
the html tagged version of code blocks <code>...</code>
|
the html tagged version of code blocks <code>...</code>
|
||||||
"""
|
"""
|
||||||
return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)
|
return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)
|
||||||
|
|
||||||
# 2021-08-06 Discovered when using this with Audubon Core list of terms build script that it does not
|
|
||||||
# correctly handle trailing commas that follow a URL. I don't understand the regex well enough to fix it
|
|
||||||
def convert_link(text_with_urls):
|
def convert_link(text_with_urls):
|
||||||
"""Takes all links in a text field and converts it to the html tagged
|
"""Takes all links in a text field and converts it to the html tagged
|
||||||
version of the link
|
version of the link
|
||||||
|
@ -87,7 +85,7 @@ def convert_link(text_with_urls):
|
||||||
url = inputstring.group()
|
url = inputstring.group()
|
||||||
return "<a href=\"{}\">{}</a>".format(url, url)
|
return "<a href=\"{}\">{}</a>".format(url, url)
|
||||||
|
|
||||||
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"
|
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
|
||||||
return re.sub(regx, _handle_matched, text_with_urls)
|
return re.sub(regx, _handle_matched, text_with_urls)
|
||||||
|
|
||||||
# ---------------
|
# ---------------
|
||||||
|
|
|
@ -170,7 +170,7 @@ class DwcDigester(object):
|
||||||
url = inputstring.group()
|
url = inputstring.group()
|
||||||
return "<a href=\"{}\">{}</a>".format(url, url)
|
return "<a href=\"{}\">{}</a>".format(url, url)
|
||||||
|
|
||||||
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"
|
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
|
||||||
return re.sub(regx, _handle_matched, text_with_urls)
|
return re.sub(regx, _handle_matched, text_with_urls)
|
||||||
|
|
||||||
def process_terms(self):
|
def process_terms(self):
|
||||||
|
|
|
@ -65,7 +65,26 @@
|
||||||
"\n",
|
"\n",
|
||||||
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
||||||
" result = re.sub(pattern, repl, text)\n",
|
" result = re.sub(pattern, repl, text)\n",
|
||||||
" return result"
|
" return result\n",
|
||||||
|
"\n",
|
||||||
|
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
|
||||||
|
"def convert_code(text_with_backticks):\n",
|
||||||
|
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
|
||||||
|
" the html tagged version of code blocks <code>...</code>\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
|
||||||
|
"\n",
|
||||||
|
"def convert_link(text_with_urls):\n",
|
||||||
|
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
|
||||||
|
" version of the link\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" def _handle_matched(inputstring):\n",
|
||||||
|
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
|
||||||
|
" url = inputstring.group()\n",
|
||||||
|
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
|
||||||
|
"\n",
|
||||||
|
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
|
||||||
|
" return re.sub(regx, _handle_matched, text_with_urls)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -300,13 +319,13 @@
|
||||||
" if row['usage'] != '':\n",
|
" if row['usage'] != '':\n",
|
||||||
" text += '\\t\\t<tr>\\n'\n",
|
" text += '\\t\\t<tr>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
|
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
|
||||||
" text += '\\t\\t</tr>\\n'\n",
|
" text += '\\t\\t</tr>\\n'\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if row['notes'] != '':\n",
|
" if row['notes'] != '':\n",
|
||||||
" text += '\\t\\t<tr>\\n'\n",
|
" text += '\\t\\t<tr>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
|
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
|
||||||
" text += '\\t\\t</tr>\\n'\n",
|
" text += '\\t\\t</tr>\\n'\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
||||||
|
@ -416,7 +435,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.1"
|
"version": "3.8.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
|
@ -65,7 +65,26 @@
|
||||||
"\n",
|
"\n",
|
||||||
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
||||||
" result = re.sub(pattern, repl, text)\n",
|
" result = re.sub(pattern, repl, text)\n",
|
||||||
" return result"
|
" return result\n",
|
||||||
|
"\n",
|
||||||
|
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
|
||||||
|
"def convert_code(text_with_backticks):\n",
|
||||||
|
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
|
||||||
|
" the html tagged version of code blocks <code>...</code>\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
|
||||||
|
"\n",
|
||||||
|
"def convert_link(text_with_urls):\n",
|
||||||
|
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
|
||||||
|
" version of the link\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" def _handle_matched(inputstring):\n",
|
||||||
|
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
|
||||||
|
" url = inputstring.group()\n",
|
||||||
|
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
|
||||||
|
"\n",
|
||||||
|
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
|
||||||
|
" return re.sub(regx, _handle_matched, text_with_urls)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -300,13 +319,13 @@
|
||||||
" if row['usage'] != '':\n",
|
" if row['usage'] != '':\n",
|
||||||
" text += '\\t\\t<tr>\\n'\n",
|
" text += '\\t\\t<tr>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
|
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
|
||||||
" text += '\\t\\t</tr>\\n'\n",
|
" text += '\\t\\t</tr>\\n'\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if row['notes'] != '':\n",
|
" if row['notes'] != '':\n",
|
||||||
" text += '\\t\\t<tr>\\n'\n",
|
" text += '\\t\\t<tr>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
|
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
|
||||||
" text += '\\t\\t</tr>\\n'\n",
|
" text += '\\t\\t</tr>\\n'\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
||||||
|
@ -416,7 +435,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.1"
|
"version": "3.8.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
|
@ -65,7 +65,26 @@
|
||||||
"\n",
|
"\n",
|
||||||
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
||||||
" result = re.sub(pattern, repl, text)\n",
|
" result = re.sub(pattern, repl, text)\n",
|
||||||
" return result"
|
" return result\n",
|
||||||
|
"\n",
|
||||||
|
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
|
||||||
|
"def convert_code(text_with_backticks):\n",
|
||||||
|
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
|
||||||
|
" the html tagged version of code blocks <code>...</code>\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
|
||||||
|
"\n",
|
||||||
|
"def convert_link(text_with_urls):\n",
|
||||||
|
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
|
||||||
|
" version of the link\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" def _handle_matched(inputstring):\n",
|
||||||
|
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
|
||||||
|
" url = inputstring.group()\n",
|
||||||
|
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
|
||||||
|
"\n",
|
||||||
|
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
|
||||||
|
" return re.sub(regx, _handle_matched, text_with_urls)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -300,13 +319,13 @@
|
||||||
" if row['usage'] != '':\n",
|
" if row['usage'] != '':\n",
|
||||||
" text += '\\t\\t<tr>\\n'\n",
|
" text += '\\t\\t<tr>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
|
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
|
||||||
" text += '\\t\\t</tr>\\n'\n",
|
" text += '\\t\\t</tr>\\n'\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if row['notes'] != '':\n",
|
" if row['notes'] != '':\n",
|
||||||
" text += '\\t\\t<tr>\\n'\n",
|
" text += '\\t\\t<tr>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
||||||
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
|
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
|
||||||
" text += '\\t\\t</tr>\\n'\n",
|
" text += '\\t\\t</tr>\\n'\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
||||||
|
@ -416,7 +435,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.1"
|
"version": "3.8.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
Loading…
Reference in New Issue