mirror of https://github.com/tdwg/dwc.git
Merge pull request #375 from tdwg/hyperlink_with_comma_patch
Implement patched link/code tagging in all build scripts
This commit is contained in:
commit
076d23c48e
|
@ -57,7 +57,7 @@ display_id = ['record_level', 'dc', 'dcterms', 'occurrence', 'organism', 'materi
|
|||
# ---------------
|
||||
|
||||
# replace URL with link (function used with Audubon Core list of terms build script)
|
||||
# Does not correctly handle URLs with close parens ) characters.
|
||||
# Does not correctly handle URLs with close parens ) characters, so no longer used.
|
||||
#
|
||||
def createLinks(text):
|
||||
def repl(match):
|
||||
|
@ -69,15 +69,13 @@ def createLinks(text):
|
|||
result = re.sub(pattern, repl, text)
|
||||
return result
|
||||
|
||||
# 2021-08-05 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey
|
||||
# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey
|
||||
def convert_code(text_with_backticks):
|
||||
"""Takes all back-quoted sections in a text field and converts it to
|
||||
the html tagged version of code blocks <code>...</code>
|
||||
"""
|
||||
return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)
|
||||
|
||||
# 2021-08-06 Discovered when using this with Audubon Core list of terms build script that it does not
|
||||
# correctly handle trailing commas that follow a URL. I don't understand the regex well enough to fix it
|
||||
def convert_link(text_with_urls):
|
||||
"""Takes all links in a text field and converts it to the html tagged
|
||||
version of the link
|
||||
|
@ -87,7 +85,7 @@ def convert_link(text_with_urls):
|
|||
url = inputstring.group()
|
||||
return "<a href=\"{}\">{}</a>".format(url, url)
|
||||
|
||||
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"
|
||||
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
|
||||
return re.sub(regx, _handle_matched, text_with_urls)
|
||||
|
||||
# ---------------
|
||||
|
|
|
@ -170,7 +170,7 @@ class DwcDigester(object):
|
|||
url = inputstring.group()
|
||||
return "<a href=\"{}\">{}</a>".format(url, url)
|
||||
|
||||
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"
|
||||
regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
|
||||
return re.sub(regx, _handle_matched, text_with_urls)
|
||||
|
||||
def process_terms(self):
|
||||
|
|
|
@ -65,7 +65,26 @@
|
|||
"\n",
|
||||
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
||||
" result = re.sub(pattern, repl, text)\n",
|
||||
" return result"
|
||||
" return result\n",
|
||||
"\n",
|
||||
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
|
||||
"def convert_code(text_with_backticks):\n",
|
||||
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
|
||||
" the html tagged version of code blocks <code>...</code>\n",
|
||||
" \"\"\"\n",
|
||||
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
|
||||
"\n",
|
||||
"def convert_link(text_with_urls):\n",
|
||||
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
|
||||
" version of the link\n",
|
||||
" \"\"\"\n",
|
||||
" def _handle_matched(inputstring):\n",
|
||||
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
|
||||
" url = inputstring.group()\n",
|
||||
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
|
||||
"\n",
|
||||
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
|
||||
" return re.sub(regx, _handle_matched, text_with_urls)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -300,13 +319,13 @@
|
|||
" if row['usage'] != '':\n",
|
||||
" text += '\\t\\t<tr>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
|
||||
" text += '\\t\\t</tr>\\n'\n",
|
||||
"\n",
|
||||
" if row['notes'] != '':\n",
|
||||
" text += '\\t\\t<tr>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
|
||||
" text += '\\t\\t</tr>\\n'\n",
|
||||
"\n",
|
||||
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
||||
|
@ -416,7 +435,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.1"
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -65,7 +65,26 @@
|
|||
"\n",
|
||||
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
||||
" result = re.sub(pattern, repl, text)\n",
|
||||
" return result"
|
||||
" return result\n",
|
||||
"\n",
|
||||
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
|
||||
"def convert_code(text_with_backticks):\n",
|
||||
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
|
||||
" the html tagged version of code blocks <code>...</code>\n",
|
||||
" \"\"\"\n",
|
||||
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
|
||||
"\n",
|
||||
"def convert_link(text_with_urls):\n",
|
||||
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
|
||||
" version of the link\n",
|
||||
" \"\"\"\n",
|
||||
" def _handle_matched(inputstring):\n",
|
||||
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
|
||||
" url = inputstring.group()\n",
|
||||
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
|
||||
"\n",
|
||||
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
|
||||
" return re.sub(regx, _handle_matched, text_with_urls)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -300,13 +319,13 @@
|
|||
" if row['usage'] != '':\n",
|
||||
" text += '\\t\\t<tr>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
|
||||
" text += '\\t\\t</tr>\\n'\n",
|
||||
"\n",
|
||||
" if row['notes'] != '':\n",
|
||||
" text += '\\t\\t<tr>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
|
||||
" text += '\\t\\t</tr>\\n'\n",
|
||||
"\n",
|
||||
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
||||
|
@ -416,7 +435,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.1"
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -65,7 +65,26 @@
|
|||
"\n",
|
||||
" pattern = '(https?://[^\\s,;\\)\"]*)'\n",
|
||||
" result = re.sub(pattern, repl, text)\n",
|
||||
" return result"
|
||||
" return result\n",
|
||||
"\n",
|
||||
"# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
|
||||
"def convert_code(text_with_backticks):\n",
|
||||
" \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
|
||||
" the html tagged version of code blocks <code>...</code>\n",
|
||||
" \"\"\"\n",
|
||||
" return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
|
||||
"\n",
|
||||
"def convert_link(text_with_urls):\n",
|
||||
" \"\"\"Takes all links in a text field and converts it to the html tagged\n",
|
||||
" version of the link\n",
|
||||
" \"\"\"\n",
|
||||
" def _handle_matched(inputstring):\n",
|
||||
" \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
|
||||
" url = inputstring.group()\n",
|
||||
" return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
|
||||
"\n",
|
||||
" regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
|
||||
" return re.sub(regx, _handle_matched, text_with_urls)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -300,13 +319,13 @@
|
|||
" if row['usage'] != '':\n",
|
||||
" text += '\\t\\t<tr>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>Usage</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
|
||||
" text += '\\t\\t</tr>\\n'\n",
|
||||
"\n",
|
||||
" if row['notes'] != '':\n",
|
||||
" text += '\\t\\t<tr>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>Notes</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
|
||||
" text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
|
||||
" text += '\\t\\t</tr>\\n'\n",
|
||||
"\n",
|
||||
" if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
|
||||
|
@ -416,7 +435,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.1"
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
Loading…
Reference in New Issue