From bbcc074d9ad628f018cbe54f95c614fcea9f8d04 Mon Sep 17 00:00:00 2001
From: stijnvanhoey <stijnvanhoey@gmail.com>
Date: Mon, 2 Oct 2017 22:31:33 +0200
Subject: [PATCH] Add functionality that converts link to html tagged link

---
 build/build.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/build/build.py b/build/build.py
index 2f45db7..b9faa83 100644
--- a/build/build.py
+++ b/build/build.py
@@ -164,13 +164,30 @@ class DwcDigester(object):
         term_data["iri"] = term_iri
         term_data["label"] = vs_term['label']
         term_data["class"] = cf_term['organized_in']
-        term_data["definition"] = vs_term['definition']
-        term_data["comments"] = cf_term['comments']
+        term_data["definition"] = self.convert_link(vs_term['definition'])
+        term_data["comments"] = self.convert_link(cf_term['comments'])
         term_data["rdf_type"] = vs_term['rdf_type']
         namespace_url, _ = self.split_iri(term_iri)
         term_data["namespace"] = self.resolve_namespace_abbrev(namespace_url)
         return term_data
 
+    @staticmethod
+    def convert_link(text_with_urls):
+        """
+
+        Notes
+        ------
+        The underlying regex is not a general URL matcher and could have shortcomings...
+        """
+        def _handle_matched(inputstring):
+            """quick hack version of url handling on the current prime versions data"""
+            url = inputstring.group()
+            if url.endswith("."): # not included in regex to notice the special 'end of . case'
+                url = url[:-1]
+            return "<a href=\"{}\">{}</a>".format(url, url)
+        regx = "(http[s]?://[\w\d:#@%/;$()~_?\+-=\\\.&]*)(?<!\))"
+        return re.sub(regx, _handle_matched, text_with_urls)
+
     def process_terms(self):
         """parse the config terms (sequence matters!), collect all required data from both the normative versions file and the config file and return the template ready data.