This script can be easily modified to code, recode, or modify csv files prior to loading in R.
#!/usr/local/bin/python
# transform a raw data file into a happy proto data frame
#
import csv
f = open( 'transposed.csv' )
hnps = csv.reader( f, delimiter = ',' )
header = hnps.next()
verbs = [
'mentioned',
'indicated',
'proposed',
'suggested',
'recommended',
'confessed',
'stated',
'announced',
'muttered',
'explained',
]
out_name = 'reshaped.csv'
out = open( out_name, 'w' )
data = csv.writer( out, delimiter = ',' )
data.writerow( [ 'subject', 'score', 'shift', 'length', 'verb' ])
for line in hnps:
shift_length = line[header.index('V1')]
try:
if shift_length[0] == "N":
shift = 'N'
length = shift_length[1:]
elif shift_length[0] == "S":
shift = 'S'
length = shift_length[1:]
except IndexError:
continue
sentence = line[header.index('ResponseID')]
for v in verbs:
# we're in trouble if one of the sentences contains more than one verb
# fortunately, they don't.
if v in sentence:
verb = v
break
for s in range( 1, 193 ):
try:
line[header.index( 'ID.' + str(s))]
except IndexError:
# that cell is empty for this subject
continue
else:
# if this subject responded to this item add data point to file
score = line[header.index( 'ID.' + str(s))]
if score.isdigit():
data.writerow( [ s, score, shift, length, verb ])
f.close()
out.close()