Merge pull request #375 from tdwg/hyperlink_with_comma_patch

Implement patched link/code tagging in all build scripts
2021-08-06 17:44:24 -05:00 · 2021-08-06 17:44:24 -05:00 · 076d23c48e
parent 686fb33dd8 3d82ba9f41
commit 076d23c48e
5 changed files with 73 additions and 18 deletions
--- a/build/build-termlist.py
+++ b/build/build-termlist.py
@ -57,7 +57,7 @@ display_id = ['record_level', 'dc', 'dcterms', 'occurrence', 'organism', 'materi
 # ---------------

 # replace URL with link (function used with Audubon Core list of terms build script)
-# Does not correctly handle URLs with close parens ) characters.
+# Does not correctly handle URLs with close parens ) characters, so no longer used.
 #
 def createLinks(text):
    def repl(match):
@ -69,15 +69,13 @@ def createLinks(text):
    result = re.sub(pattern, repl, text)
    return result

-# 2021-08-05 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey
+# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey
 def convert_code(text_with_backticks):
    """Takes all back-quoted sections in a text field and converts it to
    the html tagged version of code blocks <code>...</code>
    """
    return re.sub(r'`([^`]*)`', r'<code>\1</code>', text_with_backticks)

-# 2021-08-06 Discovered when using this with Audubon Core list of terms build script that it does not
-# correctly handle trailing commas that follow a URL. I don't understand the regex well enough to fix it
 def convert_link(text_with_urls):
    """Takes all links in a text field and converts it to the html tagged
    version of the link
@ -87,7 +85,7 @@ def convert_link(text_with_urls):
        url = inputstring.group()
        return "<a href=\"{}\">{}</a>".format(url, url)

-    regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"
+    regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
    return re.sub(regx, _handle_matched, text_with_urls)

 # ---------------
--- a/build/build.py
+++ b/build/build.py
@ -170,7 +170,7 @@ class DwcDigester(object):
            url = inputstring.group()
            return "<a href=\"{}\">{}</a>".format(url, url)

-        regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.])"
+        regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-;=\\\.&]*)(?<![\)\.,])"
        return re.sub(regx, _handle_matched, text_with_urls)

    def process_terms(self):
--- a/build/doe-cv-build/build-page-simple.ipynb
+++ b/build/doe-cv-build/build-page-simple.ipynb
@ -65,7 +65,26 @@
    "\n",
    "    pattern = '(https?://[^\\s,;\\)\"]*)'\n",
    "    result = re.sub(pattern, repl, text)\n",
-    "    return result"
+    "    return result\n",
+    "\n",
+    "# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
+    "def convert_code(text_with_backticks):\n",
+    "    \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
+    "    the html tagged version of code blocks <code>...</code>\n",
+    "    \"\"\"\n",
+    "    return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
+    "\n",
+    "def convert_link(text_with_urls):\n",
+    "    \"\"\"Takes all links in a text field and converts it to the html tagged\n",
+    "    version of the link\n",
+    "    \"\"\"\n",
+    "    def _handle_matched(inputstring):\n",
+    "        \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
+    "        url = inputstring.group()\n",
+    "        return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
+    "\n",
+    "    regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
+    "    return re.sub(regx, _handle_matched, text_with_urls)\n"
   ]
  },
  {
@ -300,13 +319,13 @@
    "        if row['usage'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Usage</td>\\n'\n",
-    "            text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if row['notes'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Notes</td>\\n'\n",
-    "            text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
@ -416,7 +435,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.8.5"
  }
 },
 "nbformat": 4,
--- a/build/em-cv-build/build-page-simple.ipynb
+++ b/build/em-cv-build/build-page-simple.ipynb
@ -65,7 +65,26 @@
    "\n",
    "    pattern = '(https?://[^\\s,;\\)\"]*)'\n",
    "    result = re.sub(pattern, repl, text)\n",
-    "    return result"
+    "    return result\n",
+    "\n",
+    "# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
+    "def convert_code(text_with_backticks):\n",
+    "    \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
+    "    the html tagged version of code blocks <code>...</code>\n",
+    "    \"\"\"\n",
+    "    return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
+    "\n",
+    "def convert_link(text_with_urls):\n",
+    "    \"\"\"Takes all links in a text field and converts it to the html tagged\n",
+    "    version of the link\n",
+    "    \"\"\"\n",
+    "    def _handle_matched(inputstring):\n",
+    "        \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
+    "        url = inputstring.group()\n",
+    "        return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
+    "\n",
+    "    regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
+    "    return re.sub(regx, _handle_matched, text_with_urls)\n"
   ]
  },
  {
@ -300,13 +319,13 @@
    "        if row['usage'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Usage</td>\\n'\n",
-    "            text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if row['notes'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Notes</td>\\n'\n",
-    "            text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
@ -416,7 +435,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.8.5"
  }
 },
 "nbformat": 4,
--- a/build/pw-cv-build/build-page-simple.ipynb
+++ b/build/pw-cv-build/build-page-simple.ipynb
@ -65,7 +65,26 @@
    "\n",
    "    pattern = '(https?://[^\\s,;\\)\"]*)'\n",
    "    result = re.sub(pattern, repl, text)\n",
-    "    return result"
+    "    return result\n",
+    "\n",
+    "# 2021-08-06 Replace the createLinks() function with functions copied from the QRG build script written by S. Van Hoey\n",
+    "def convert_code(text_with_backticks):\n",
+    "    \"\"\"Takes all back-quoted sections in a text field and converts it to\n",
+    "    the html tagged version of code blocks <code>...</code>\n",
+    "    \"\"\"\n",
+    "    return re.sub(r'`([^`]*)`', r'<code>\\1</code>', text_with_backticks)\n",
+    "\n",
+    "def convert_link(text_with_urls):\n",
+    "    \"\"\"Takes all links in a text field and converts it to the html tagged\n",
+    "    version of the link\n",
+    "    \"\"\"\n",
+    "    def _handle_matched(inputstring):\n",
+    "        \"\"\"quick hack version of url handling on the current prime versions data\"\"\"\n",
+    "        url = inputstring.group()\n",
+    "        return \"<a href=\\\"{}\\\">{}</a>\".format(url, url)\n",
+    "\n",
+    "    regx = \"(http[s]?://[\\w\\d:#@%/;$()~_?\\+-;=\\\\\\.&]*)(?<![\\)\\.,])\"\n",
+    "    return re.sub(regx, _handle_matched, text_with_urls)\n"
   ]
  },
  {
@ -300,13 +319,13 @@
    "        if row['usage'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Usage</td>\\n'\n",
-    "            text += '\\t\\t\\t<td>' + createLinks(row['usage']) + '</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + convert_link(convert_code(row['usage'])) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if row['notes'] != '':\n",
    "            text += '\\t\\t<tr>\\n'\n",
    "            text += '\\t\\t\\t<td>Notes</td>\\n'\n",
-    "            text += '\\t\\t\\t<td>' + createLinks(row['notes']) + '</td>\\n'\n",
+    "            text += '\\t\\t\\t<td>' + convert_link(convert_code(row['notes'])) + '</td>\\n'\n",
    "            text += '\\t\\t</tr>\\n'\n",
    "\n",
    "        if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary\n",
@ -416,7 +435,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.8.5"
  }
 },
 "nbformat": 4,