from os import listdir from os.path import isfile, join import sys import time import datetime import configparser from r2r import R2RClient, R2RException cfg = configparser.ConfigParser() cfg.read('_config.cfg') client = R2RClient("http://localhost:7272/") client.documents.create_sample(hi_res=True) data_dir = cfg['DEFAULT']['PDF_DIR'] dir_paths = [ data_dir+cfg['genetics']['diabetes'], data_dir+cfg['genetics']['aging'] ] print("The defined directory paths {0}".format(dir_paths)) def ingest_files(client, sleep_time, doc_list): responses = [] resp = () for the_doc in doc_list: #print(the_doc) try: resp = client.documents.create(file_path=the_doc) print("Ingested {0} with the following response {1}".format(the_doc,resp)) except R2RException: print("Problem ingesting {0}\n\t{1}".format(the_doc, sys.exception())) #except: #responses.append(resp) # brace against pinging API too quickly time.sleep(sleep_time) return responses def create_file_list(the_dir): return [ join(the_dir, f) for f in listdir(the_dir) if isfile(join(the_dir, f))] #print("The list has {0} files".format(len(result))) ''' # subroutine to test list content for the_pdf in the_pdfs: print('{0} -> {1}'.format(ndx, the_pdf)) ndx += 1 ''' return result def ingest_all_files(client, dir_list): result = [] sleep_time = 3 print(datetime.datetime.now()) begin_ingesting = datetime.datetime.now() for the_dir in dir_list: the_list = create_file_list(the_dir) result.append(ingest_files(client, sleep_time, the_list)) #ingest_resp = () #resps = ingest_files(client, sleep_time, file_list) end_ingesting = datetime.datetime.now() # show results of ingesting documents #print("Entry [{0}]\t{1} {2}\n\t\t{3}".format(ndx,file_list, (end_ingesting-begin_ingesting), ingest_response)) # brace against pinging API too quickly #time.sleep(sleeptime) ingest_all_files(client, dir_paths)