July 11, 2014
This script can be easily modified to code, recode, or modify csv files prior to loading in R.
#!/usr/local/bin/python # transform a raw data file into a happy proto data frame # import csv f = open( 'transposed.csv' ) hnps = csv.reader( f, delimiter = ',' ) header = hnps.next() verbs = [ 'mentioned', 'indicated', 'proposed', 'suggested', 'recommended', 'confessed', 'stated', 'announced', 'muttered', 'explained', ] out_name = 'reshaped.csv' out = open( out_name, 'w' ) data = csv.writer( out, delimiter = ',' ) data.writerow( [ 'subject', 'score', 'shift', 'length', 'verb' ]) for line in hnps: shift_length = line[header.index('V1')] try: if shift_length[0] == "N": shift = 'N' length = shift_length[1:] elif shift_length[0] == "S": shift = 'S' length = shift_length[1:] except IndexError: continue sentence = line[header.index('ResponseID')] for v in verbs: # we're in trouble if one of the sentences contains more than one verb # fortunately, they don't. if v in sentence: verb = v break for s in range( 1, 193 ): try: line[header.index( 'ID.' + str(s))] except IndexError: # that cell is empty for this subject continue else: # if this subject responded to this item add data point to file score = line[header.index( 'ID.' + str(s))] if score.isdigit(): data.writerow( [ s, score, shift, length, verb ]) f.close() out.close()