#!/env/python # Convenience script to undo the HDF5 HDFS test setup from setup.py # Call with at least one of --shutdown and --remove flags to have any effect. # # Assumes that HDFS is installed and configured, and that JAVA_HOME and # HADOOP_HOME are valid environment variables. import os, sys, json, yaml from argparse import ArgumentParser from os import environ import os.path as ospath from subprocess import call def orig_script(): HDFS_TESTDIR = "/tmp" TEST_FILES = [ "Poe_Raven.txt", "t.h5", "t8.shakespeare.txt", ] try : JAVA_HOME = environ["JAVA_HOME"] assert ospath.isdir(JAVA_HOME), "JAVA_HOME must be a directory" except KeyError : print("JAVA_HOME not set as environment variable!") exit(1) try : HADOOP_HOME = environ["HADOOP_HOME"] assert ospath.isdir(HADOOP_HOME), "HADOOP_HOME must be a directory" except KeyError : print("HADOOP_HOME not set as environment variable!") exit(1) _hdfs = ospath.join(HADOOP_HOME, "bin", "hdfs") parser = ArgumentParser( description="Call with at least one of the provided flags") parser.add_argument( "--shutdown", action="store_true", help="Take down HDFS instance") parser.add_argument( "--remove", action="store_true", help="Remove files from HDFS") args = parser.parse_args() if args.remove : rmfiles = ' '.join([f"{HDFS_TESTDIR}/{file}" for file in TEST_FILES]) call(f"{_hdfs} dfs -rm {rmfiles}".split()) call(f"{_hdfs} dfs -rmdir {HDFS_TESTDIR}".split()) if args.shutdown : call(f"{_hdfs} --daemon stop namenode".split()) call(f"{_hdfs} --daemon stop secondarynamenode".split()) call(f"{_hdfs} --daemon stop datanode".split()) print("DONE") def main(argv): print ('{}:{}:{}:{}:{}'.format(sys.argv[0],sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])) configfile = sys.argv[1] buildconfig = sys.argv[2] tparams = sys.argv[3] slaveos = sys.argv[4] with open(configfile) as yaml_file: yaml_data = yaml.safe_load(yaml_file) #print(yaml_data) currdir=os.getcwd() #### print ('current={}'.format(currdir)) HDFS_TESTDIR = os.path.join(currdir, 'tmp') for tparam in yaml_data['configfile'][buildconfig][tparams]: if 'testparams' in tparam: testfiles = yaml_data['configfile'][buildconfig][tparams]['testfiles'] #print ('tconf env == {}'.format(testfiles)) rmfiles = ' '.join([f"{HDFS_TESTDIR}/{file}" for file in testfiles]) try : JAVA_HOME = environ["JAVA_HOME"] assert ospath.isdir(JAVA_HOME), "JAVA_HOME must be a directory" except KeyError : print("JAVA_HOME not set as environment variable!") exit(1) try : HADOOP_HOME = environ["HADOOP_HOME"] assert ospath.isdir(HADOOP_HOME), "HADOOP_HOME must be a directory" except KeyError : print("HADOOP_HOME not set as environment variable!") exit(1) _hdfs = ospath.join(HADOOP_HOME, "bin", "hdfs") call(f"{_hdfs} dfs -rm {rmfiles}".split()) call(f"{_hdfs} dfs -rmdir {HDFS_TESTDIR}".split()) call(f"{_hdfs} --daemon stop namenode".split()) call(f"{_hdfs} --daemon stop secondarynamenode".split()) call(f"{_hdfs} --daemon stop datanode".split()) print("DONE") if __name__ == '__main__': main(sys.argv)