From b456fb5dd4a305e0d243ff2f82fda730a894122e Mon Sep 17 00:00:00 2001 From: RosalynHatcher Date: Wed, 21 Aug 2019 08:24:41 +0100 Subject: [PATCH] Remove cf-checker wrapper script. Has not been updated for python3 and the caching of the standard tables is now incorporated into the main checker. --- README.md | 12 --- src/cf-checker | 285 ------------------------------------------------- 2 files changed, 297 deletions(-) delete mode 100755 src/cf-checker diff --git a/README.md b/README.md index 0f009b7..2cc4b04 100644 --- a/README.md +++ b/README.md @@ -57,18 +57,6 @@ The following parameters can be set on the command-line or through environment v 2. `CF_AREA_TYPES` or (CL option `-a`) : The path or URL to the CF area types table 3. `CF_REGION_NAMES` or (CL option `-r`): The path or URL to the CF region names table -### Wrapper script - -Note: The wrapper script has not been tested under Python 3. The cf-checker itself now has the option to cache the standard_name, area_types and region_name tables. - -A wrapper to cfchecks, called `cf-checker`, is provided in the `src/` directory, which will maintain local copies of the standard names table and the area types table, and will refresh these local copies only if the age of the file (based on its modification time) is more than a specified maximum, defaulting to 1 day. This allows for running the checker repeatedly without refetching the tables on each invocation, while still keeping them reasonably up to date. - -For a usage message, type `cf-checker -h` - -Note that the wrapper defaults to storing the downloaded files in `/var/spool/cf-checker`, so if the script is used unmodified then this directory should be created or else an alternative value should be passed as a command line option (`-d`). Ensure either that all users have write permission to the directory used, or else that a user that does have write permission runs a cron job to refresh the tables. For the latter purpose, it is permissible to run the wrapper without specifying any data files to check, in which it will do no more than update the tables; this is still conditional on age, so for this purpose it is recommended to run the wrapper with a maximum age of zero (`-t 0`), and to run the cron job at intervals not exceeding the -default maximum age. - -The wrapper is maintained by CEDA and not by NCAS CMS. ### Running the Test script diff --git a/src/cf-checker b/src/cf-checker deleted file mode 100755 index aed3cd6..0000000 --- a/src/cf-checker +++ /dev/null @@ -1,285 +0,0 @@ -#!/usr/bin/env python - -import sys -import os -import urllib2 -import time -import string -import subprocess -import getopt - -class CfchecksWrapper(object): - - """ - cf-checker [options] file1 [file2...] - cf-checker [options] --update_only - cf-checker -h|--help - - Description: - A wrapper to the CF checker, which will keep local copies of the area - types and standard name tables, and fetch fresh copies as required if - the local copies are too old. - - If --update_only is specified, then it will update the area - types and standard name tables without checking any data files. - In this case the tables are updated regardless of their timestamp - (though this can be overridden with the -t flag - see below). - - Other options: - - -a|--area_types : - location of the CF area types table - - -s|--cf_standard_names : - location of the CF standard name table - - -u|--udunits : - location of the udunits2.xml file - - -v|--version : - CF version to check against - - -t|--max_age - maximum allowable age of the tables before downloading - a fresh copy (floating point number permitted). - The default is 1 day when checking data files, - and is 0 if --update_only is specified. - - -d|--spool_dir : - directory to use for local copies of the area-types - and standard-name tables - default = /var/spool/cf-checker - - --verbose: - increase verbosity - - -h|--help: - prints this help text and the help text for the - underlying checker - - Note that the datafiles to check are optional: the wrapper can be run - without them to just download the table files as necessary. - - On a multi-user system, it may be desirable to remove world write - permission from the spool directory and to set up a cron job to - run this wrapper with --update_only as a user that has write - access to it. -""" - - def __init__(self, - spool_dir = '/var/spool/cf-checker', - standard_names_key = 'CF_STANDARD_NAMES', - standard_names_url = 'http://cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-table.xml', - area_types_key = 'CF_AREA_TYPES', - area_types_url = 'http://cfconventions.org/Data/area-type-table/current/src/area-type-table.xml', - udunits_key = 'UDUNITS', - udunits_path = '/usr/share/udunits/udunits2.xml', - checker_command = 'cfchecks', - default_max_table_age = 1., - verbose = False): - - self.spool_dir = spool_dir - self.standard_names_key = standard_names_key - self.standard_names_url = standard_names_url - self.area_types_key = area_types_key - self.area_types_url = area_types_url - self.default_max_table_age = default_max_table_age - self.max_table_age = None - self.udunits_key = udunits_key, - self.udunits_path = udunits_path - self.checker_command = checker_command - self.verbose = verbose - self.cf_version = None - - def main(self): - "Download table files as required and run the checker" - - # allow values to be passed in environment (as also understood directly by - # the wrapper) but for these to be overridden on the command line - exit_value = 0 - self.set_values_from_env() - self.parse_args(sys.argv[1:]) - try: - self.refresh_tables() - except (IOError, OSError) as err: - print("Warning: error when trying to refresh local copies of xml tables: %s: %s" - % (err.filename, err.strerror)) - self.verbose = True - exit_value = 1 - if self.files_to_check: - print("\nTrying to run checker anyway...") - for data_file in self.files_to_check: - status = self.run_checker(data_file, cf_version = self.cf_version) - if status != 0: - exit_value = 1 - sys.exit(exit_value) - - def parse_args(self, cmd_args): - """Parse values from the command line (and environment). - """ - try: - (opts, args) = getopt.getopt( - cmd_args, - 'a:s:u:v:t:d:h', - ['area_types=', 'cf_standard_names=', 'udunits=', - 'version=', 'max_age=', 'spool_dir=', "verbose", - 'update_only']) - except getopt.GetoptError: - self.usage(status = 1) - - update_only = False - for arg, val in opts: - if arg in ("-a", "--area_types"): - self.area_types_url = val - elif arg in ("-s", "--cf_standard_names"): - self.standard_names_url = val - elif arg in ("-u", "--udunits"): - self.udunits_path = val - elif arg in ("-v", "--version"): - self.cf_version = val - elif arg in ("-t", "--max_age"): - try: - self.max_table_age = float(val) - except ValueError: - self.usage(status = 1) - elif arg in ("-d", "--spool_dir"): - self.spool_dir = val - elif arg in ("-h", "--help"): - self.usage(show_checker_usage=True, status=0) - elif arg in ("--verbose"): - self.verbose = True - elif arg in ("--update_only"): - update_only = True - else: - print("unrecognised option %s (coding bug?)" % arg) - - if self.max_table_age == None: - if update_only: - self.max_table_age = 0 - else: - self.max_table_age = self.default_max_table_age - - self.files_to_check = args - if not update_only and not self.files_to_check: - print("Error: no files to check and --update_only not given run with --help (or -h) " - "for a full usage message") - sys.exit(1) - - self.set_table_paths() - - def set_values_from_env(self): - for env_key, self_key in ((self.standard_names_key, 'standard_names_url'), - (self.area_types_key, 'area_types_url'), - (self.udunits_key, 'udunits_path')): - try: - val = os.environ[env_key] - except KeyError: - continue - setattr(self, self_key, val) - - def set_table_paths(self): - "Set up the local paths of the tables that are downloaded" - self.standard_names_path = self.get_local_path(self.standard_names_url) - self.area_types_path = self.get_local_path(self.area_types_url) - - def get_local_path(self, url): - "Return path of locally downloaded copy of a URL" - return os.path.join(self.spool_dir, os.path.basename(url)) - - def usage(self, status=None, show_checker_usage=False): - "Print a usage message, and exit if status passed" - print(self.__doc__) - if show_checker_usage: - print("\nUsage message from underlying checker (%s) follows:" % self.checker_command) - print("-----------------------------------------------------\n") - self.run_command([self.checker_command, "-h"]) - if status != None: - sys.exit(status) - - def refresh_tables(self): - "Download the standard names and area type tables as required" - for url, local_path in ((self.standard_names_url, self.standard_names_path), - (self.area_types_url, self.area_types_path)): - if self.is_remote(url) and not self.recent_copy_exists(local_path): - self.fetch(url, local_path) - - def is_remote(self, url): - "Return True unless url is a file: URL (or just a local path)" - return not (url.startswith("file:") or url.startswith("/") or ":" not in url) - - def recent_copy_exists(self, local_path): - "return True if the path exists and has age less than max allowed age" - if not os.path.exists(local_path): - return False - age = self.file_age(local_path) - if self.verbose: - print("age of %s is %s days" % (local_path, age)) - if age > self.max_table_age: - return False - if age < 0: - # a future mtime does *not* constitute a recent copy - it means - # something has gone wrong, so we say no - return False - return True - - def file_age(self, path): - "return age of file in days (based on mtime)" - try: - return (time.time() - os.stat(path).st_mtime) / 86400. - except OSError: - return None - - def fetch(self, url, local_path, tmp_path=None): - "Fetch URL to the specified local path (via tmp file for atomicity)" - tmp_path = tmp_path or ("%s.tmp" % local_path) - if self.verbose: - print("downloading %s to %s" % (url, tmp_path)) - self.download(url, tmp_path) - if self.verbose: - print("renaming %s to %s" % (tmp_path, local_path)) - os.rename(tmp_path, local_path) - - def download(self, url, local_path): - "Raw download of URL to the specified local path and make world readable" - fin = urllib2.urlopen(url) - fout = open(local_path, "w") - while True: - data = fin.read() - if not data: - break - fout.write(data) - fin.close() - fout.close() - os.chmod(local_path, 0644) - - def run_checker(self, data_file, cf_version=None): - "Run the CF checker on specified file against specified CF version" - command = [self.checker_command, - "-s", self.standard_names_path, - "-a", self.area_types_path, - "-u", self.udunits_path] - if cf_version: - command += ["-v", cf_version] - command.append(data_file) - if self.verbose: - print("running command: ", string.join(command)) - # for clarity, flush before running command - sys.stdout.flush() - sys.stderr.flush() - status = self.run_command(command) - if self.verbose: - print("checker returned status %s" % status) - return status - - def run_command(self, command): - "run command, return status" - # for clarity, flush before running command - sys.stdout.flush() - sys.stderr.flush() - return subprocess.call(command) - - -if __name__ == '__main__': - - w = CfchecksWrapper() - w.main()