Revised attempt to escape quotes in content, using xml.sax.saxutils.escape. Results for existing configs are valid XML.

This commit is contained in:
tucotuco 2023-07-09 20:26:08 -03:00
parent e879849ce5
commit a2dc19afe7
1 changed files with 12 additions and 7 deletions

View File

@ -11,11 +11,12 @@
# One extension can be generated per run of the script, with the extension's name and # One extension can be generated per run of the script, with the extension's name and
# destination file as parameters (see main() for syntax). # destination file as parameters (see main() for syntax).
# #
__version__ = '2023-07-09T12:46-03:00' __version__ = '2023-07-09T20:21-03:00'
import csv import csv
import sys import sys
import argparse import argparse
from xml.sax.saxutils import escape
class CSVtoXMLConverter: class CSVtoXMLConverter:
''' '''
@ -566,13 +567,17 @@ class CSVtoXMLConverter:
xml += ' xmlns:dc="http://purl.org/dc/terms/"\n' xml += ' xmlns:dc="http://purl.org/dc/terms/"\n'
xml += ' xsi:schemaLocation="http://rs.gbif.org/extension/ http://rs.gbif.org/schema/extension.xsd"\n' xml += ' xsi:schemaLocation="http://rs.gbif.org/extension/ http://rs.gbif.org/schema/extension.xsd"\n'
xml += f' dc:title="{extension.get("title")}"\n' xml += f' dc:title="{extension.get("title")}"\n'
xml += f' name="{extension_name}" namespace="{extension.get("namespace")}" rowType="{extension.get("rowType")}"\n' xml += f' name="{extension_name}"\n'
xml += f' namespace="{extension.get("namespace")}"\n'
xml += f' rowType="{extension.get("rowType")}"\n'
xml += f' dc:issued="{extension.get("dc:issued")}"\n' xml += f' dc:issued="{extension.get("dc:issued")}"\n'
subject = extension.get("dc:subject") subject = extension.get("dc:subject")
if subject is not None: if subject is not None:
xml += f' dc:subject="{extension.get("dc:subject")}"\n' xml += f' dc:subject="{extension.get("dc:subject")}"\n'
xml += f' dc:relation="{extension.get("dc:relation")}"\n' xml += f' dc:relation="{extension.get("dc:relation")}"\n'
xml += f' dc:description="{extension.get("dc:description")}">\n' description = extension.get("dc:description")
description = escape(description, {'"':'"'})
xml += f' dc:description="{description}">\n'
xml += '\n' xml += '\n'
with open(self.csv_file_path, 'r') as csv_file: with open(self.csv_file_path, 'r') as csv_file:
reader = csv.reader(csv_file) reader = csv.reader(csv_file)
@ -603,8 +608,10 @@ class CSVtoXMLConverter:
description = row_dict["definition"] description = row_dict["definition"]
if row_dict.get("comments") is not None and len(row_dict.get("comments"))>0: if row_dict.get("comments") is not None and len(row_dict.get("comments"))>0:
description += f' {row_dict["comments"]}' description += f' {row_dict["comments"]}'
description = escape(description, {'"':'"'})
term_xml += f'dc:description="{description}" ' term_xml += f'dc:description="{description}" '
examples = row_dict.get("examples") or "" examples = row_dict.get("examples") or ""
examples = escape(examples, {'"':'"'})
term_xml += f'examples="{examples}" ' term_xml += f'examples="{examples}" '
if row_dict["term_localName"] in extension.get("required"): if row_dict["term_localName"] in extension.get("required"):
term_xml += f'required="true"/>' term_xml += f'required="true"/>'
@ -612,9 +619,10 @@ class CSVtoXMLConverter:
term_xml += f'required="false"/>' term_xml += f'required="false"/>'
xml += f' {term_xml}\n' xml += f' {term_xml}\n'
for addition in extension.get("gbif_additions"): for addition in extension.get("gbif_additions"):
addition = escape(addition,{'"':'"'})
xml += f' {addition}' xml += f' {addition}'
xml += "</extension>" xml += "</extension>"
return encoded_quotes(xml) return xml
def write_xml(self, extension_name, filename): def write_xml(self, extension_name, filename):
''' '''
@ -624,9 +632,6 @@ class CSVtoXMLConverter:
with open(filename, 'w') as xml_file: with open(filename, 'w') as xml_file:
xml_file.write(self.get_xml(extension_name)) xml_file.write(self.get_xml(extension_name))
def encoded_quotes(s):
return s.replace('"', '&quot;')
def _getoptions(): def _getoptions():
''' Parse command line options and return them.''' ''' Parse command line options and return them.'''
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()