Finished graph generation for task 3
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
<property name="dist" location="dist" />
|
||||
|
||||
<!--<property name="hadoop.version" value="2.0.0-mr1-cdh4.1.2" /> -->
|
||||
<property name="hadoop.base.path" value="/usr/local/Cellar/hadoop/2.7.3/" />
|
||||
<property name="hadoop.base.path" value="/usr/lib/hadoop/client/" />
|
||||
|
||||
|
||||
<path id="classpath">
|
||||
@@ -28,7 +28,7 @@
|
||||
<!-- Compile the java code from ${src} into ${build} -->
|
||||
<property name="myclasspath" refid="classpath"/>
|
||||
<echo message="Classpath = ${myclasspath}"/>
|
||||
<javac srcdir="${src}" debug="true" destdir="${build}" target="1.8" source="1.8">
|
||||
<javac srcdir="${src}" debug="true" destdir="${build}" target="1.7" source="1.7">
|
||||
<classpath refid="classpath"/>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
@@ -11,33 +11,17 @@ def main():
|
||||
# Store each line as a string in a list
|
||||
lines = data.readlines()
|
||||
# Get the highest index of tweets
|
||||
indexes = np.array([x.split()[0] for x in lines], dtype=int)
|
||||
max_ind = np.max(indexes)
|
||||
min_ind = np.min(indexes)
|
||||
labels = [x.split()[0] for x in lines]
|
||||
|
||||
# Create a 2D array of zeros to fill with index-count pairs
|
||||
data = np.zeros([max_ind-min_ind+1, 2], dtype=int)
|
||||
# Fill first column with indexes for each category (1-5, 6-10 etc...)
|
||||
data[:, 0] = np.arange(max_ind-min_ind+1)+1
|
||||
|
||||
labels = [[] for i in xrange(max_ind-min_ind+1)]
|
||||
for line in lines:
|
||||
# Split the line into it's two components
|
||||
line = line.split()
|
||||
# Get the index stored in component 1
|
||||
ind = int(line[0])-min_ind
|
||||
if ind < 0:
|
||||
pdb.set_trace()
|
||||
|
||||
# Set column two at the index provided to the value provided
|
||||
data[ind][1] = line[-1]
|
||||
labels[ind] = "{0} {1}".format(*line[:-2])
|
||||
line = line.split()[1:]
|
||||
|
||||
# Create labels for each index to show each group's range
|
||||
|
||||
# Plot data...
|
||||
x = data[:, 0]
|
||||
y = data[:, 1]
|
||||
x = labels
|
||||
y = lines[:, 1]
|
||||
markerline, stemlines, baseline = plt.stem(x, y, '-')
|
||||
plt.xticks(x, labels, rotation='vertical')
|
||||
xmin,xmax = plt.xlim()
|
||||
|
||||
Executable
+35
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pdb
|
||||
|
||||
|
||||
def main():
|
||||
# Open final output generated from hadoop
|
||||
with open("../FinalOutput.txt") as data:
|
||||
# Store each line as a string in a list
|
||||
lines = data.readlines()
|
||||
# Get the highest index of tweets
|
||||
labels = [x.split('\t')[0] for x in lines]
|
||||
|
||||
# Create labels for each index to show each group's range
|
||||
|
||||
# Plot data...
|
||||
x = np.arange(len(labels))
|
||||
y = np.array([int(z.split('\t')[1]) for z in lines])
|
||||
markerline, stemlines, baseline = plt.stem(x, y, '-')
|
||||
plt.xticks(x, labels, rotation='vertical')
|
||||
xmin,xmax = plt.xlim()
|
||||
xbuff = 0.025*(xmax-xmin)
|
||||
plt.xlim(xmin-xbuff,xmax+xbuff)
|
||||
plt.setp(stemlines, 'color', 'b')
|
||||
plt.yscale("log", nonposy='clip')
|
||||
plt.grid(True)
|
||||
fig = plt.gcf()
|
||||
fig.subplots_adjust(bottom=0.23)
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -46,6 +46,8 @@ public class TweetFreqDayMapper extends Mapper<Object, Text, Text, LongWritable>
|
||||
out = Denonyms.findDenonym(tweet);
|
||||
}
|
||||
}
|
||||
System.out.println(out);
|
||||
System.out.println(tweet);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user