Implement patched link/code tagging in all build scripts

This commit is contained in:
Steve Baskauf 2021-08-06 13:19:01 -05:00
parent 686fb33dd8
commit 3d82ba9f41
5 changed files with 73 additions and 18 deletions

View File

@ -57,7 +57,7 @@ display_id = ['record_level', 'dc', 'dcterms', 'occurrence', 'organism', 'materi
# --------------- # ---------------
# replace URL with link (function used with Audubon Core list of terms build script) # replace URL with link (function used with Audubon Core list of terms build script)
# Does not correctly handle URLs with close parens ) characters. # Does not correctly handle URLs with close parens ) characters, so no longer used.
# #
def createLinks(text): def createLinks(text):
def repl(match): def repl(match):
@ -69,15 +69,13 @@ def createLinks(text):
result = re.sub(pattern, repl, text) result = re.sub(pattern, repl, text)
return result return result
# 2021-08-05 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey # 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey
def convert_code(text_with_backticks): def convert_code(text_with_backticks):
"""Takes all back-quoted sections in a text field and converts it to """Takes all back-quoted sections in a text field and converts it to
the html tagged version of code blocks <code>...</code> the html tagged version of code blocks <code>...</code>
""" """
return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks) return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)
# 2021-08-06 Discovered when using this with Audubon Core list of terms build script that it does not
# correctly handle trailing commas that follow a URL. I don't understand the regex well enough to fix it
def convert_link(text_with_urls): def convert_link(text_with_urls):
"""Takes all links in a text field and converts it to the html tagged """Takes all links in a text field and converts it to the html tagged
version of the link version of the link
@ -87,7 +85,7 @@ def convert_link(text_with_urls):
url = inputstring.group() url = inputstring.group()
return "<a href=\"{}\">{}</a>".format(url, url) return "<a href=\"{}\">{}</a>".format(url, url)
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])" regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
return re.sub(regx, _handle_matched, text_with_urls) return re.sub(regx, _handle_matched, text_with_urls)
# --------------- # ---------------

View File

@ -170,7 +170,7 @@ class DwcDigester(object):
url = inputstring.group() url = inputstring.group()
return "<a href=\"{}\">{}</a>".format(url, url) return "<a href=\"{}\">{}</a>".format(url, url)
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])" regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
return re.sub(regx, _handle_matched, text_with_urls) return re.sub(regx, _handle_matched, text_with_urls)
def process_terms(self): def process_terms(self):

View File

@ -65,7 +65,26 @@
"\n", "\n",
" pattern = '(https?://[^\\s,;\\)\"]*)'\n", " pattern = '(https?://[^\\s,;\\)\"]*)'\n",
" result = re.sub(pattern, repl, text)\n", " result = re.sub(pattern, repl, text)\n",
" return result" " return result\n",
"\n",
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
"def convert_code(text_with_backticks):\n",
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
" the html tagged version of code blocks <code>...</code>\n",
" \"\"\"\n",
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
"\n",
"def convert_link(text_with_urls):\n",
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
" version of the link\n",
" \"\"\"\n",
" def _handle_matched(inputstring):\n",
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
" url = inputstring.group()\n",
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
"\n",
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
" return re.sub(regx, _handle_matched, text_with_urls)\n"
] ]
}, },
{ {
@ -300,13 +319,13 @@
" if row['usage'] != '':\n", " if row['usage'] != '':\n",
" text += '\\t\\t<tr>\\n'\n", " text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Usage</td>\\n'\n", " text += '\\t\\t\\t<td>Usage</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n", " text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n", " text += '\\t\\t</tr>\\n'\n",
"\n", "\n",
" if row['notes'] != '':\n", " if row['notes'] != '':\n",
" text += '\\t\\t<tr>\\n'\n", " text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Notes</td>\\n'\n", " text += '\\t\\t\\t<td>Notes</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n", " text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n", " text += '\\t\\t</tr>\\n'\n",
"\n", "\n",
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n", " if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
@ -416,7 +435,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.7.1" "version": "3.8.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -65,7 +65,26 @@
"\n", "\n",
" pattern = '(https?://[^\\s,;\\)\"]*)'\n", " pattern = '(https?://[^\\s,;\\)\"]*)'\n",
" result = re.sub(pattern, repl, text)\n", " result = re.sub(pattern, repl, text)\n",
" return result" " return result\n",
"\n",
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
"def convert_code(text_with_backticks):\n",
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
" the html tagged version of code blocks <code>...</code>\n",
" \"\"\"\n",
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
"\n",
"def convert_link(text_with_urls):\n",
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
" version of the link\n",
" \"\"\"\n",
" def _handle_matched(inputstring):\n",
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
" url = inputstring.group()\n",
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
"\n",
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
" return re.sub(regx, _handle_matched, text_with_urls)\n"
] ]
}, },
{ {
@ -300,13 +319,13 @@
" if row['usage'] != '':\n", " if row['usage'] != '':\n",
" text += '\\t\\t<tr>\\n'\n", " text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Usage</td>\\n'\n", " text += '\\t\\t\\t<td>Usage</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n", " text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n", " text += '\\t\\t</tr>\\n'\n",
"\n", "\n",
" if row['notes'] != '':\n", " if row['notes'] != '':\n",
" text += '\\t\\t<tr>\\n'\n", " text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Notes</td>\\n'\n", " text += '\\t\\t\\t<td>Notes</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n", " text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n", " text += '\\t\\t</tr>\\n'\n",
"\n", "\n",
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n", " if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
@ -416,7 +435,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.7.1" "version": "3.8.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -65,7 +65,26 @@
"\n", "\n",
" pattern = '(https?://[^\\s,;\\)\"]*)'\n", " pattern = '(https?://[^\\s,;\\)\"]*)'\n",
" result = re.sub(pattern, repl, text)\n", " result = re.sub(pattern, repl, text)\n",
" return result" " return result\n",
"\n",
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
"def convert_code(text_with_backticks):\n",
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
" the html tagged version of code blocks <code>...</code>\n",
" \"\"\"\n",
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
"\n",
"def convert_link(text_with_urls):\n",
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
" version of the link\n",
" \"\"\"\n",
" def _handle_matched(inputstring):\n",
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
" url = inputstring.group()\n",
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
"\n",
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
" return re.sub(regx, _handle_matched, text_with_urls)\n"
] ]
}, },
{ {
@ -300,13 +319,13 @@
" if row['usage'] != '':\n", " if row['usage'] != '':\n",
" text += '\\t\\t<tr>\\n'\n", " text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Usage</td>\\n'\n", " text += '\\t\\t\\t<td>Usage</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n", " text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n", " text += '\\t\\t</tr>\\n'\n",
"\n", "\n",
" if row['notes'] != '':\n", " if row['notes'] != '':\n",
" text += '\\t\\t<tr>\\n'\n", " text += '\\t\\t<tr>\\n'\n",
" text += '\\t\\t\\t<td>Notes</td>\\n'\n", " text += '\\t\\t\\t<td>Notes</td>\\n'\n",
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n", " text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
" text += '\\t\\t</tr>\\n'\n", " text += '\\t\\t</tr>\\n'\n",
"\n", "\n",
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n", " if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
@ -416,7 +435,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.7.1" "version": "3.8.5"
} }
}, },
"nbformat": 4, "nbformat": 4,