mirror of https://github.com/tdwg/dwc.git
116 lines
4.7 KiB
Python
116 lines
4.7 KiB
Python
# Script to make the current document be the previous document
|
|
# This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0
|
|
# Author: Steve Baskauf
|
|
|
|
script_version = '0.1.0'
|
|
version_modified = '2023-09-17'
|
|
|
|
# NOTE: This script should be run only after the script updating the machine-readable metadata has been run.
|
|
# It must be run before the script that generates the new document version.
|
|
|
|
import requests
|
|
import pandas as pd
|
|
import yaml
|
|
import os
|
|
import sys
|
|
|
|
# -----------------
|
|
# Command line arguments
|
|
# -----------------
|
|
|
|
arg_vals = sys.argv[1:]
|
|
opts = [opt for opt in arg_vals if opt.startswith('-')]
|
|
args = [arg for arg in arg_vals if not arg.startswith('-')]
|
|
|
|
# Name of the last part of the URL of the doc
|
|
if '--slug' in opts:
|
|
document_slug = args[opts.index('--slug')]
|
|
else:
|
|
print('Must specify URL slug for document using --slug option')
|
|
exit()
|
|
|
|
# Used as the directory name
|
|
if '--dir' in opts:
|
|
directory_name = args[opts.index('--dir')]
|
|
else:
|
|
print('Must specify name of directory containing template and configs using --dir option')
|
|
exit()
|
|
|
|
# "master" for production, something else for development
|
|
if '--branch' in opts:
|
|
github_branch = args[opts.index('--branch')]
|
|
else:
|
|
github_branch = 'master'
|
|
|
|
|
|
# -----------------
|
|
# Configuration section
|
|
# -----------------
|
|
|
|
githubBaseUri = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/' + github_branch + '/'
|
|
|
|
config_file_path = 'process/document_metadata_processing/' + directory_name + '/'
|
|
document_configuration_yaml_file = 'document_configuration.yaml'
|
|
|
|
path_of_doc_relative_to_build_dir = '../docs/' + document_slug + '/'
|
|
|
|
# Load the document configuration YAML file from its GitHub URL
|
|
document_configuration_yaml_url = githubBaseUri + config_file_path + document_configuration_yaml_file
|
|
document_configuration_yaml = requests.get(document_configuration_yaml_url).text
|
|
document_configuration_yaml = yaml.load(document_configuration_yaml, Loader=yaml.FullLoader)
|
|
|
|
# Determine date of the document that is to be turned into the previous document and the version IRI
|
|
# of the most recent version of that document.
|
|
|
|
# Load versions list from document versions data in the rs.tdwg.org repo and find most recent version.
|
|
versions_data_url = githubBaseUri + 'docs-versions/docs-versions.csv'
|
|
versions_list_df = pd.read_csv(versions_data_url, na_filter=False)
|
|
|
|
# Slice all rows for versions of this document.
|
|
matching_versions = versions_list_df[versions_list_df['current_iri']==document_configuration_yaml['current_iri']]
|
|
# Sort the matching versions by version IRI in descending order so that the most recent version is first.
|
|
matching_versions = matching_versions.sort_values(by=['version_iri'], ascending=[False])
|
|
|
|
# Check for the error condition of there being no matching versions.
|
|
if len(matching_versions.index) == 0:
|
|
print('There are no versions of this document. Did you run the script to update the document metadata?')
|
|
exit()
|
|
|
|
# If there is only one row in the matching_versions dataframe (only one version), then the rest of the script should not be run.
|
|
if len(matching_versions.index) == 1:
|
|
print('There is only one version of this document. No changes are being made to the documents.')
|
|
exit()
|
|
|
|
# The most recent version is the first row in the dataframe (row 0).
|
|
|
|
# Find the column index of the column named "version_iri".
|
|
version_iri_column_index = matching_versions.columns.get_loc('version_iri')
|
|
most_recent_version_iri = matching_versions.iat[0, version_iri_column_index]
|
|
print(most_recent_version_iri)
|
|
|
|
# Find the date of the previous version, which is in the second row of the dataframe (row 1).
|
|
# Find the column index of the column named "version_issued".
|
|
version_iri_column_index = matching_versions.columns.get_loc('version_issued')
|
|
previous_version_date = matching_versions.iat[1, version_iri_column_index]
|
|
print(previous_version_date)
|
|
|
|
# The document to be converted is named "index.md". Its name must be changed to the date of the previous version.
|
|
os.rename(path_of_doc_relative_to_build_dir + 'index.md', path_of_doc_relative_to_build_dir + previous_version_date + '.md')
|
|
|
|
# Open the renamed file and read its text.
|
|
with open(path_of_doc_relative_to_build_dir + previous_version_date + '.md', 'rt') as file_object:
|
|
file_text = file_object.read()
|
|
|
|
# Insert the replacement version information into the header
|
|
replacement_version_metadata_string = '''Replaced by
|
|
: <''' + most_recent_version_iri + '''>
|
|
|
|
'''
|
|
|
|
# Insert the previous version information into the header above the Abstract section.
|
|
header = file_text.replace('Abstract\n:', replacement_version_metadata_string + 'Abstract\n:')
|
|
|
|
# Write the updated file text to the file.
|
|
with open(path_of_doc_relative_to_build_dir + previous_version_date + '.md', 'wt') as file_object:
|
|
file_object.write(header)
|