1 Resposta
0
The easiest way to do it through mrjob module, install it using pip.
Hese is a sample code for Map Reduce using mrjob module.
from mrjob.job import MRJob
from mrjob.step import MRStep
# Defining the Mappers and Reducers functions
# They should be defined inside the return of the steps function
# Here we have one Mapper and two Reducers
class planesCrashed(MRJob):
def steps(self):
return[
MRStep(mapper=self.mapper_get_year,
reducer=self.reducer_count_crash),
MRStep(reducer=self.reducer_count_crash_sort)
]
# Writing the body of the Mapper and return Key:Value pairs
def mapper_get_year(self,_,line):
b = line.split('\t')
(Date,Time,Location,Operator,Flight,Route,Type,Registration,cnIn,Aboard,Fatalities,Ground,Summary) = b
c = Date.split('/')
yield c[-1],1
# Writing the body of the first Reducer and return Key:Value pairs
def reducer_count_crash(self,key,values):
yield str(sum(values)).zfill(5),key
# Writing the body of the second Reducer and return Key:Value pairs
def reducer_count_crash_sort(self,count,years):
for year in years:
yield count,year
if __name__=='__main__':
planesCrashed.run()