Slopegraphs in Python – The Great Refactor

Despite being on holiday, I had a spare hour to refactor the code (@mrshrbrmstr was joining the 1% in the hotel spa). It’s up on github and now sports a spiffy JSON-format config file. You now must execute the slopegraph.py script with a “--config FILENAME” argument. The configuration file lets you specify the “theme” as well as the input file and output format (you can only use PDF for the moment).

Here’s a sample config file included in the github push (there’s another one there too):

  1. {
  2.  
  3. "font_family" : "Palatino",
  4. "font_size" : "20",
  5.  
  6. "x_margin" : "20",
  7. "y_margin" : "30",
  8.  
  9. "line_width" : "0.5",
  10.  
  11. "background_color" : "DEC299",
  12. "label_color" : "687D64",
  13. "value_color" : "949258",
  14. "slope_color" : "61514C",
  15.  
  16. "value_format_string" : "%2d",
  17.  
  18. "input" : "television.csv",
  19. "output" : "television",
  20. "format" : "pdf"
  21.  
  22. }

Included in the refactor is the ability to use a sprintf-like format string for the label value output to make the slopegraphs a tad prettier. Also included with the refactor is a new limitation of the CSV file requiring a

"LABEL, VALUE, VALUE"

format in preparation for support for multiple columns. As @jayjacobs said to me, it’s easy to reformat data into the CSV file format, and, he’s right (as usual).

Plans for the next revision include:

  • Specifying a transparent background
  • Specifying PDF|PS|SVG|PNG format output
  • Allowing for an arbitrary number of columns for the slopegraph
  • Optional column labels as well as slopepgraph title (with themeing)
  • Line color change by slope up/same/down value (will most likely be pushed out, tho)

Here’s the whole source:

  1. import csv
  2. import cairo
  3. import argparse
  4. import json
  5.  
  6. def split(input, size):
  7. 	return [input[start:start+size] for start in range(0, len(input), size)]
  8.  
  9. class Slopegraph:
  10.  
  11. 	SLOPEGRAPH_CANVAS_SIZE = 300
  12.  
  13. 	starts = {} # starting "points"
  14. 	ends = {} # ending "points"
  15. 	pairs = [] # base pair array for the final plotting
  16.  
  17. 	def readCSV(self, filename):
  18.  
  19. 		slopeReader = csv.reader(open(filename, 'rb'), delimiter=',', quotechar='"')
  20.  
  21. 		for row in slopeReader:
  22.  
  23. 			# add chosen values (need start/end for each CSV row) to the final plotting array.
  24.  
  25. 			lab = row[0] # label
  26. 			beg = float(row[1]) # left vals
  27. 			end = float(row[2]) # right vals
  28.  
  29. 			self.pairs.append( (float(beg), float(end)) )
  30.  
  31. 			# combine labels of common values into one string
  32.  
  33. 			if beg in self.starts:
  34. 				self.starts[beg] = self.starts[beg] + "; " + lab
  35. 			else:
  36. 				self.starts[beg] = lab
  37.  
  38.  
  39. 			if end in self.ends:
  40. 				self.ends[end] = self.ends[end] + "; " + lab
  41. 			else:
  42. 				self.ends[end] = lab
  43.  
  44.  
  45. 	def sortKeys(self):
  46.  
  47. 		# sort all the values (in the event the CSV wasn't) so
  48. 		# we can determine the smallest increment we need to use
  49. 		# when stacking the labels and plotting points
  50.  
  51. 		self.startSorted = [(k, self.starts[k]) for k in sorted(self.starts)]
  52. 		self.endSorted = [(k, self.ends[k]) for k in sorted(self.ends)]
  53.  
  54. 		self.startKeys = sorted(self.starts.keys())
  55. 		self.delta = max(self.startSorted)
  56. 		for i in range(len(self.startKeys)):
  57. 			if (i+1 <= len(self.startKeys)-1):
  58. 				currDelta = float(self.startKeys[i+1]) - float(self.startKeys[i])
  59. 				if (currDelta < self.delta):
  60. 					self.delta = currDelta
  61.  
  62. 		self.endKeys = sorted(self.ends.keys())
  63. 		for i in range(len(self.endKeys)):
  64. 			if (i+1 <= len(self.endKeys)-1):
  65. 				currDelta = float(self.endKeys[i+1]) - float(self.endKeys[i])
  66. 				if (currDelta < self.delta):
  67. 					self.delta = currDelta
  68.  
  69.  
  70. 	def findExtremes(self):
  71.  
  72. 		# we also need to find the absolute min & max values
  73. 		# so we know how to scale the plots
  74.  
  75. 		self.lowest = min(self.startKeys)
  76. 		if (min(self.endKeys) < self.lowest) : self.lowest = min(self.endKeys)
  77.  
  78. 		self.highest = max(self.startKeys)
  79. 		if (max(self.endKeys) > self.highest) : self.highest = max(self.endKeys)
  80.  
  81. 		self.delta = float(self.delta)
  82. 		self.lowest = float(self.lowest)
  83. 		self.highest = float(self.highest)
  84.  
  85.  
  86. 	def calculateExtents(self, filename, format, valueFormatString):
  87.  
  88. 		surface = cairo.PDFSurface (filename, 8.5*72, 11*72)
  89. 		cr = cairo.Context (surface)
  90. 		cr.save()
  91. 		cr.select_font_face(self.FONT_FAMILY, cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
  92. 		cr.set_font_size(self.FONT_SIZE)
  93. 		cr.set_line_width(self.LINE_WIDTH)
  94.  
  95. 		# find the *real* maximum label width (not just based on number of chars)
  96.  
  97. 		maxLabelWidth = 0
  98. 		maxNumWidth = 0
  99.  
  100. 		for k in sorted(self.startKeys):
  101. 			s1 = self.starts[k]
  102. 			xbearing, ybearing, self.sWidth, self.sHeight, xadvance, yadvance = (cr.text_extents(s1))
  103. 			if (self.sWidth > maxLabelWidth) : maxLabelWidth = self.sWidth
  104. 			xbearing, ybearing, self.startMaxLabelWidth, startMaxLabelHeight, xadvance, yadvance = (cr.text_extents(valueFormatString % (k)))
  105. 			if (self.startMaxLabelWidth > maxNumWidth) : maxNumWidth = self.startMaxLabelWidth
  106.  
  107. 		self.sWidth = maxLabelWidth
  108. 		self.startMaxLabelWidth = maxNumWidth
  109.  
  110. 		maxLabelWidth = 0
  111. 		maxNumWidth = 0
  112.  
  113. 		for k in sorted(self.endKeys):
  114. 			e1 = self.ends[k]
  115. 			xbearing, ybearing, self.eWidth, eHeight, xadvance, yadvance = (cr.text_extents(e1))
  116. 			if (self.eWidth > maxLabelWidth) : maxLabelWidth = self.eWidth
  117. 			xbearing, ybearing, self.endMaxLabelWidth, endMaxLabelHeight, xadvance, yadvance = (cr.text_extents(valueFormatString % (k)))
  118. 			if (self.endMaxLabelWidth > maxNumWidth) : maxNumWidth = self.endMaxLabelWidth
  119.  
  120. 		self.eWidth = maxLabelWidth
  121. 		self.endMaxLabelWidth = maxNumWidth	
  122.  
  123. 		cr.restore()
  124. 		cr.show_page()
  125. 		surface.finish()
  126.  
  127. 		self.width = self.X_MARGIN + self.sWidth + self.SPACE_WIDTH + self.startMaxLabelWidth + self.SPACE_WIDTH + self.SLOPEGRAPH_CANVAS_SIZE + self.SPACE_WIDTH + self.endMaxLabelWidth + self.SPACE_WIDTH + self.eWidth + self.X_MARGIN ;
  128. 		self.height = (self.Y_MARGIN * 2) + (((self.highest - self.lowest) / self.delta) * self.LINE_HEIGHT)
  129.  
  130.  
  131. 	def makeSlopegraph(self, filename, config):
  132.  
  133. 		(lab_r,lab_g,lab_b) = split(config["label_color"],2)
  134. 		(val_r,val_g,val_b) = split(config["value_color"],2)
  135. 		(line_r,line_g,line_b) = split(config["slope_color"],2)
  136. 		(bg_r,bg_g,bg_b) = split(config["background_color"],2)
  137.  
  138. 		LAB_R = (int(lab_r, 16)/255.0)
  139. 		LAB_G = (int(lab_g, 16)/255.0)
  140. 		LAB_B = (int(lab_b, 16)/255.0)
  141.  
  142. 		VAL_R = (int(val_r, 16)/255.0)
  143. 		VAL_G = (int(val_g, 16)/255.0)
  144. 		VAL_B = (int(val_b, 16)/255.0)
  145.  
  146. 		LINE_R = (int(line_r, 16)/255.0)
  147. 		LINE_G = (int(line_g, 16)/255.0)
  148. 		LINE_B = (int(line_b, 16)/255.0)
  149.  
  150. 		BG_R = (int(bg_r, 16)/255.0)
  151. 		BG_G = (int(bg_g, 16)/255.0)
  152. 		BG_B = (int(bg_b, 16)/255.0)
  153.  
  154. 		surface = cairo.PDFSurface (filename, self.width, self.height)
  155. 		cr = cairo.Context(surface)
  156.  
  157. 		cr.save()
  158.  
  159. 		cr.select_font_face(self.FONT_FAMILY, cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
  160. 		cr.set_font_size(self.FONT_SIZE)
  161.  
  162. 		cr.set_line_width(self.LINE_WIDTH)
  163.  
  164. 		cr.set_source_rgb(BG_R,BG_G,BG_B)
  165. 		cr.rectangle(0,0,self.width,self.height)
  166. 		cr.fill()
  167.  
  168. 		# draw start labels at the correct positions
  169.  
  170. 		valueFormatString = config["value_format_string"]
  171.  
  172. 		for k in sorted(self.startKeys):
  173.  
  174. 			val = float(k)
  175. 			label = self.starts[k]
  176. 			xbearing, ybearing, lWidth, lHeight, xadvance, yadvance = (cr.text_extents(label))
  177. 			xbearing, ybearing, kWidth, kHeight, xadvance, yadvance = (cr.text_extents(valueFormatString % (val)))
  178.  
  179. 			cr.set_source_rgb(LAB_R,LAB_G,LAB_B)
  180. 			cr.move_to(self.X_MARGIN + (self.sWidth - lWidth), self.Y_MARGIN + (self.highest - val) * self.LINE_HEIGHT * (1/self.delta))
  181. 			cr.show_text(label)
  182.  
  183. 			cr.set_source_rgb(VAL_R,VAL_G,VAL_B)
  184. 			cr.move_to(self.X_MARGIN + self.sWidth + self.SPACE_WIDTH + (self.startMaxLabelWidth - kWidth), self.Y_MARGIN + (self.highest - val) * self.LINE_HEIGHT * (1/self.delta))
  185. 			cr.show_text(valueFormatString % (val))
  186.  
  187. 			cr.stroke()
  188.  
  189. 		# draw end labels at the correct positions
  190.  
  191. 		for k in sorted(self.endKeys):
  192.  
  193. 			val = float(k)
  194. 			label = self.ends[k]
  195. 			xbearing, ybearing, lWidth, lHeight, xadvance, yadvance = (cr.text_extents(label))
  196.  
  197. 			cr.set_source_rgb(VAL_R,VAL_G,VAL_B)
  198. 			cr.move_to(self.width - self.X_MARGIN - self.SPACE_WIDTH - self.eWidth - self.SPACE_WIDTH - self.endMaxLabelWidth, self.Y_MARGIN + (self.highest - val) * self.LINE_HEIGHT * (1/self.delta))
  199. 			cr.show_text(valueFormatString % (val))
  200.  
  201. 			cr.set_source_rgb(LAB_R,LAB_G,LAB_B)
  202. 			cr.move_to(self.width - self.X_MARGIN - self.SPACE_WIDTH - self.eWidth, self.Y_MARGIN + (self.highest - val) * self.LINE_HEIGHT * (1/self.delta))
  203. 			cr.show_text(label)
  204.  
  205. 			cr.stroke()
  206.  
  207. 		# do the actual plotting
  208.  
  209. 		cr.set_line_width(self.LINE_WIDTH)
  210. 		cr.set_source_rgb(LINE_R, LINE_G, LINE_B)
  211.  
  212. 		for s1,e1 in self.pairs:
  213. 			cr.move_to(self.X_MARGIN + self.sWidth + self.SPACE_WIDTH + self.startMaxLabelWidth + self.LINE_START_DELTA, self.Y_MARGIN + (self.highest - s1) * self.LINE_HEIGHT * (1/self.delta) - self.LINE_HEIGHT/4)
  214. 			cr.line_to(self.width - self.X_MARGIN - self.eWidth - self.SPACE_WIDTH - self.endMaxLabelWidth - self.LINE_START_DELTA, self.Y_MARGIN + (self.highest - e1) * self.LINE_HEIGHT * (1/self.delta) - self.LINE_HEIGHT/4)
  215. 			cr.stroke()
  216.  
  217. 		cr.restore()
  218. 		cr.show_page()
  219. 		surface.finish()	
  220.  
  221.  
  222. 	def __init__(self, config):
  223.  
  224. 		# a couple methods need these so make them local to the class
  225.  
  226. 		self.FONT_FAMILY = config["font_family"]
  227. 		self.LINE_WIDTH = float(config["line_width"])
  228. 		self.X_MARGIN = float(config["x_margin"])
  229. 		self.Y_MARGIN = float(config["y_margin"])
  230. 		self.FONT_SIZE = float(config["font_size"])
  231. 		self.SPACE_WIDTH = self.FONT_SIZE / 2.0
  232. 		self.LINE_HEIGHT = self.FONT_SIZE + (self.FONT_SIZE / 2.0)
  233. 		self.LINE_START_DELTA = 1.5*self.SPACE_WIDTH
  234.  
  235. 		OUTPUT_FILE = config["output"] + "." + config["format"]
  236.  
  237. 		# process the values & make the slopegraph
  238.  
  239. 		self.readCSV(config["input"])
  240. 		self.sortKeys()
  241. 		self.findExtremes()
  242. 		self.calculateExtents(OUTPUT_FILE, config["format"], config["value_format_string"])
  243. 		self.makeSlopegraph(OUTPUT_FILE, config)
  244.  
  245.  
  246. def main():
  247.  
  248. 	parser = argparse.ArgumentParser(description="Creates a slopegraph from a CSV source")
  249. 	parser.add_argument("--config",required=True,
  250. 					help="config file name to use for  slopegraph creation",)
  251. 	args = parser.parse_args()
  252.  
  253. 	if args.config:
  254.  
  255. 		json_data = open(args.config)
  256. 		config = json.load(json_data)
  257. 		json_data.close()
  258.  
  259. 		Slopegraph(config)
  260.  
  261. 	return(0)
  262.  
  263. if __name__ == "__main__":
  264. 	main()
Cover image from Data-Driven Security
Amazon Author Page

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.