# Script to insert header metadata into a document template # This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0 # Author: Steve Baskauf script_version = '0.1.0' version_modified = '2023-09-17' # NOTE: This script should be run after the script that moves the previous document to a version. import requests import pandas as pd import yaml import sys # ----------------- # Command line arguments # ----------------- arg_vals = sys.argv[1:] opts = [opt for opt in arg_vals if opt.startswith('-')] args = [arg for arg in arg_vals if not arg.startswith('-')] # Name of the last part of the URL of the doc if '--slug' in opts: document_slug = args[opts.index('--slug')] else: print('Must specify URL slug for document using --slug option') exit() # Used as the directory name if '--dir' in opts: directory_name = args[opts.index('--dir')] else: print('Must specify name of directory containing template and configs using --dir option') exit() # "master" for production, something else for development if '--branch' in opts: github_branch = args[opts.index('--branch')] else: github_branch = 'master' # ----------------- # Configuration section # ----------------- # This is the base URL for raw files from the branch of the repo that has been pushed to GitHub github_base_url = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/' + github_branch + '/' document_template_name = 'index_template.md' # This is the input file into which the header metadata will be inserted. out_filename = '../docs/' + document_slug + '/index.md' # This is where the document rendered by GitHub Pages will be created. config_file_path = 'process/document_metadata_processing/' + directory_name + '/' contributors_yaml_file = 'authors_configuration.yaml' document_configuration_yaml_file = 'document_configuration.yaml' # Load the contributors YAML file from its GitHub URL contributors_yaml_url = github_base_url + config_file_path + contributors_yaml_file contributors_yaml = requests.get(contributors_yaml_url).text if contributors_yaml == '404: Not Found': print('Contributors YAML file not found. Check the URL.') print(contributors_yaml_url) exit() contributors_yaml = yaml.load(contributors_yaml, Loader=yaml.FullLoader) # Load the document configuration YAML file from its GitHub URL document_configuration_yaml_url = github_base_url + config_file_path + document_configuration_yaml_file document_configuration_yaml = requests.get(document_configuration_yaml_url).text document_configuration_yaml = yaml.load(document_configuration_yaml, Loader=yaml.FullLoader) # ----------------- # Main script # ----------------- # Because this is a hack of the build-termlist.py script, "header" is used in variable names, although in this case # it is the entire document template, not just the header. headerObject = open(directory_name + '/' + document_template_name, 'rt', encoding='utf-8') header = headerObject.read() headerObject.close() # Build the Markdown for the contributors list contributors = '' for contributor in contributors_yaml: contributors += '[' + contributor['contributor_literal'] + '](' + contributor['contributor_iri'] + ') ' contributors += '([' + contributor['affiliation'] + '](' + contributor['affiliation_uri'] + ')), ' contributors = contributors[:-2] # Remove the last comma and space # Substitute values of ratification_date and contributors into the header template header = header.replace('{document_title}', document_configuration_yaml['documentTitle']) header = header.replace('{ratification_date}', document_configuration_yaml['doc_modified']) header = header.replace('{created_date}', document_configuration_yaml['doc_created']) header = header.replace('{contributors}', contributors) header = header.replace('{standard_iri}', document_configuration_yaml['dcterms_isPartOf']) header = header.replace('{current_iri}', document_configuration_yaml['current_iri']) header = header.replace('{abstract}', document_configuration_yaml['abstract']) header = header.replace('{creator}', document_configuration_yaml['creator']) header = header.replace('{publisher}', document_configuration_yaml['publisher']) year = document_configuration_yaml['doc_modified'].split('-')[0] header = header.replace('{year}', year) # Determine whether there was a previous version of the document. if document_configuration_yaml['doc_created'] != document_configuration_yaml['doc_modified']: # Load versions list from document versions data in the rs.tdwg.org repo and find most recent version. versions_data_url = github_base_url + 'docs/docs-versions.csv' versions_list_df = pd.read_csv(versions_data_url, na_filter=False) # Slice all rows for versions of this document. matching_versions = versions_list_df[versions_list_df['current_iri']==document_configuration_yaml['current_iri']] # Sort the matching versions by version IRI in descending order so that the most recent version is first. matching_versions = matching_versions.sort_values(by=['version_iri'], ascending=[False]) # The previous version is the second row in the dataframe (row 1). # The version IRI is in the second column (column 1). most_recent_version_iri = matching_versions.iat[1, 1] #print(most_recent_version_iri) # Insert the previous version information into the header previous_version_metadata_string = '''Previous version : <''' + most_recent_version_iri + '''> ''' # Insert the previous version information into the designated slot. header = header.replace('{previous_version_slot}\n\n', previous_version_metadata_string) else: # If there was no previous version, remove the slot from the header. header = header.replace('{previous_version_slot}\n\n', '') outputObject = open(out_filename, 'wt', encoding='utf-8') outputObject.write(header) outputObject.close() print('done')