#!/usr/bin/python # extract-column # Version: 1.1 # Created by: Jim Cameron # Created: October 21, 2016 # Last modified: October 21, 2016 # # Web: http://support.moonpoint.com/languages/python/excel/extract-column.php # Download : http://support.moonpoint.com/downloads/computer_languages/python/extract-column.py # Description: Extract data from the specified column of an Excel spreadsheet. # The data is assumed to be in the first worksheet in the file. The script # will prompt for the file name and the column to be extracted. Column "A" is # regarded as column "1", i.e., column numbers start at 1. The script will also # prompt for a file name for the text file that will hold the extracted data. # Used to check if file exists and is accessible import os.path, sys # Used to read data from the spreadsheet from xlrd import open_workbook,cellname def extractData(spreadsheet,colNum,extractedDataFile): workbook = open_workbook(spreadsheet) sheet = workbook.sheet_by_index(0) print "Worksheet name: ",sheet.name print "Number of entries: ",sheet.nrows # Extract entries from the specified column # outputFile = open(extractedDataFile, 'w') for row_index in range(sheet.nrows): outputFile.write(sheet.cell(row_index,colNum).value + "\n") outputFile.close() def checkInputFile(spreadsheet): # Check if the file exists if os.path.isfile(spreadsheet) is False: print "Error! The specified file does not exist or is not readable." quit() elif not os.access(spreadsheet, os.R_OK): # Check if the file is accessible print "Error! The file is not accessible; check the file permissions" quit() else: return def checkColumn(spreadsheet,colnum): workbook = open_workbook(spreadsheet) sheet = workbook.sheet_by_index(0) if colnum <= 0: print "Error! The column number entered must be 1 or greater." quit() elif colnum > sheet.ncols: print "Error! Specified column", colnum, "but the worksheet has only", sheet.ncols, "columns" quit() else: return # Check whether the script is being run with all the needed arguments already # provided on the command line. The name of the script itself is sys.argv[0], so # 1 is subtracted to count only the number of arguments given to the script. arguments = len(sys.argv)-1 # Display usage information if there is at least 1 parameter entered on # the command line, but less than three parameters provided. if arguments > 0 and arguments < 3: print "extract-column extracts a column of data to a designated text file" print "" print "Usage: extract-column inputFile colNumber outputFile" print "inputFile: spreadsheet file" print "colNumber: column to be extracted; column A is column 1, B is 2, etc." print "outputFile: name of text file to be used to store extracted data" print "" print "If no parameters are specified on the command line, the script" print "will prompt for them." exit() elif arguments == 0 or arguments < 3: # If no parameters have been provided on the command line, or less than 3 # parameters, prompt for the name of the input file, the number of the # column to extract and the name of the file to hold the extracted data spreadsheet = raw_input("Enter spreadsheet file name: ") checkInputFile(spreadsheet) # Ask for the column number to be extracted. For xlrd the first column is # column 0, but a user will more likely consider this column 1, so use 1 # as the first column, but subtract 1 from the value provided by the user # when extracting the data col_index = int(raw_input("Column number (A is column 1): ")) checkColumn(spreadsheet,col_index) outputFile = raw_input("Enter output file name: ") extractData(spreadsheet,col_index - 1,outputFile) else: # All parameters have been provided on the command line checkInputFile(sys.argv[1]) checkColumn(sys.argv[1],int(sys.argv[2])) extractData(sys.argv[1],int(sys.argv[2])-1,sys.argv[3])