From 18b405db72fe34b079dd3523d465563465fa1623 Mon Sep 17 00:00:00 2001
From: Alex Goodman <wagoodman@users.noreply.github.com>
Date: Sun, 14 Oct 2018 10:57:59 -0400
Subject: [PATCH] Size based efficiency (#24)

---
 filetree/efficiency.go | 80 ++++++++++++++++++++++++++++++++++++++++++
 filetree/tree.go       | 27 --------------
 filetree/tree_test.go  |  4 +--
 image/image.go         |  4 ---
 image/layer.go         |  7 ++--
 ui/detailsview.go      | 30 +++++++++++++++-
 ui/layerview.go        |  4 +--
 ui/ui.go               |  4 +++
 8 files changed, 121 insertions(+), 39 deletions(-)
 create mode 100644 filetree/efficiency.go

diff --git a/filetree/efficiency.go b/filetree/efficiency.go
new file mode 100644
index 0000000..657f523
--- /dev/null
+++ b/filetree/efficiency.go
@@ -0,0 +1,80 @@
+package filetree
+
+import (
+	"sort"
+)
+
+type EfficiencySlice []*EfficiencyData
+
+type EfficiencyData struct {
+	Path              string
+	Nodes             []*FileNode
+	CumulativeSize    int64
+	minDiscoveredSize int64
+}
+
+func (d EfficiencySlice) Len() int {
+	return len(d)
+}
+
+func (d EfficiencySlice) Swap(i, j int) {
+	d[i], d[j] = d[j], d[i]
+}
+
+func (d EfficiencySlice) Less(i, j int) bool {
+	return d[i].CumulativeSize < d[j].CumulativeSize
+}
+
+
+// 1. Files that are duplicated across layers discounts your score, weighted by file size
+// 2. Files that are removed discounts your score, weighted by the original file size
+func Efficiency(trees []*FileTree) (float64, EfficiencySlice) {
+	efficiencyMap := make(map[string]*EfficiencyData)
+	inefficientMatches := make(EfficiencySlice, 0)
+
+	visitor := func(node *FileNode) error {
+		path := node.Path()
+		if _, ok := efficiencyMap[path]; !ok {
+			efficiencyMap[path] = &EfficiencyData{
+				Path:              path,
+				Nodes:             make([]*FileNode,0),
+				minDiscoveredSize: -1,
+			}
+		}
+		data := efficiencyMap[path]
+		data.CumulativeSize += node.Data.FileInfo.TarHeader.Size
+		if data.minDiscoveredSize < 0 || node.Data.FileInfo.TarHeader.Size < data.minDiscoveredSize {
+			data.minDiscoveredSize = node.Data.FileInfo.TarHeader.Size
+		}
+		data.Nodes = append(data.Nodes, node)
+
+		if len(data.Nodes) == 2 {
+			inefficientMatches = append(inefficientMatches, data)
+		}
+
+		return nil
+	}
+	visitEvaluator := func(node *FileNode) bool {
+		return node.IsLeaf()
+	}
+	for _, tree := range trees {
+		tree.VisitDepthChildFirst(visitor, visitEvaluator)
+	}
+
+
+	// calculate the score
+	var minimumPathSizes int64
+	var discoveredPathSizes int64
+
+	for _, value := range efficiencyMap {
+		minimumPathSizes += value.minDiscoveredSize
+		discoveredPathSizes += value.CumulativeSize
+	}
+	score := float64(minimumPathSizes) / float64(discoveredPathSizes)
+
+	sort.Sort(inefficientMatches)
+
+	return score, inefficientMatches
+}
+
+
diff --git a/filetree/tree.go b/filetree/tree.go
index a7bfddb..27703d5 100644
--- a/filetree/tree.go
+++ b/filetree/tree.go
@@ -262,30 +262,3 @@ func StackRange(trees []*FileTree, start, stop int) *FileTree {
 
 	return tree
 }
-
-// EfficiencyMap creates a map[string]int showing how often each int
-// appears in the
-func EfficiencyMap(trees []*FileTree) map[string]int {
-	result := make(map[string]int)
-	visitor := func(node *FileNode) error {
-		result[node.Path()]++
-		return nil
-	}
-	visitEvaluator := func(node *FileNode) bool {
-		return node.IsLeaf()
-	}
-	for _, tree := range trees {
-		tree.VisitDepthChildFirst(visitor, visitEvaluator)
-	}
-	return result
-}
-
-func EfficiencyScore(trees []*FileTree) float64 {
-	efficiencyMap := EfficiencyMap(trees)
-	uniquePaths := len(efficiencyMap)
-	pathAppearances := 0
-	for _, value := range efficiencyMap {
-		pathAppearances += value
-	}
-	return float64(uniquePaths) / float64(pathAppearances)
-}
diff --git a/filetree/tree_test.go b/filetree/tree_test.go
index 2711ede..8d7eec7 100644
--- a/filetree/tree_test.go
+++ b/filetree/tree_test.go
@@ -554,7 +554,7 @@ func TestEfficiencyScore(t *testing.T) {
 		trees[ix] = tree
 	}
 	expected := 2.0 / 6.0
-	actual := EfficiencyScore(trees)
+	actual := CalculateEfficiency(trees)
 	if math.Abs(expected-actual) > 0.0001 {
 		t.Fatalf("Expected %f but got %f", expected, actual)
 	}
@@ -567,7 +567,7 @@ func TestEfficiencyScore(t *testing.T) {
 		trees[ix] = tree
 	}
 	expected = 1.0
-	actual = EfficiencyScore(trees)
+	actual = CalculateEfficiency(trees)
 	if math.Abs(expected-actual) > 0.0001 {
 		t.Fatalf("Expected %f but got %f", expected, actual)
 	}
diff --git a/image/image.go b/image/image.go
index b923433..9b2d80f 100644
--- a/image/image.go
+++ b/image/image.go
@@ -17,10 +17,6 @@ import (
 	"golang.org/x/net/context"
 )
 
-const (
-	LayerFormat = "%-25s %5s %7s %s"
-)
-
 func check(e error) {
 	if e != nil {
 		panic(e)
diff --git a/image/layer.go b/image/layer.go
index 85d7ae2..39a88c6 100644
--- a/image/layer.go
+++ b/image/layer.go
@@ -4,10 +4,13 @@ import (
 	"github.com/wagoodman/dive/filetree"
 	"strings"
 	"fmt"
-	"strconv"
 	"github.com/dustin/go-humanize"
 )
 
+const (
+	LayerFormat = "%-25s %7s  %s"
+)
+
 type Layer struct {
 	TarPath  string
 	History ImageHistoryEntry
@@ -35,8 +38,6 @@ func (layer *Layer) String() string {
 
 	return fmt.Sprintf(LayerFormat,
 		layer.Id(),
-		strconv.Itoa(int(100.0*filetree.EfficiencyScore(layer.RefTrees[:layer.Index+1]))) + "%",
-		//"100%",
 		humanize.Bytes(uint64(layer.History.Size)),
 		strings.TrimPrefix(layer.History.CreatedBy, "/bin/sh -c "))
 }
diff --git a/ui/detailsview.go b/ui/detailsview.go
index b215521..61a542d 100644
--- a/ui/detailsview.go
+++ b/ui/detailsview.go
@@ -6,6 +6,9 @@ import (
 	"github.com/jroimartin/gocui"
 	"github.com/lunixbochs/vtclean"
 	"strings"
+	"github.com/wagoodman/dive/filetree"
+	"strconv"
+	"github.com/dustin/go-humanize"
 )
 
 type DetailsView struct {
@@ -13,6 +16,8 @@ type DetailsView struct {
 	gui        *gocui.Gui
 	view       *gocui.View
 	header     *gocui.View
+	efficiency float64
+	inefficiencies filetree.EfficiencySlice
 }
 
 func NewStatisticsView(name string, gui *gocui.Gui) (detailsview *DetailsView) {
@@ -55,13 +60,32 @@ func (view *DetailsView) IsVisible() bool {
 	return true
 }
 
+// we only need to update this view upon the initial tree load
 func (view *DetailsView) Update() error {
+	layerTrees := Views.Tree.RefTrees
+	view.efficiency, view.inefficiencies = filetree.Efficiency(layerTrees[:len(layerTrees)-1])
 	return nil
 }
 
 func (view *DetailsView) Render() error {
 	currentLayer := Views.Layer.currentLayer()
 
+	var wastedSpace int64
+
+	template := "%5s  %12s  %-s\n"
+	inefficiencyReport := fmt.Sprintf(Formatting.Header(template), "Count", "Total Space", "Path")
+	for idx := len(view.inefficiencies)-1; idx > 0; idx-- {
+		data := view.inefficiencies[idx]
+		if data.CumulativeSize == 0 {
+			continue
+		}
+		wastedSpace += data.CumulativeSize
+		inefficiencyReport += fmt.Sprintf(template, strconv.Itoa(len(data.Nodes)), humanize.Bytes(uint64(data.CumulativeSize)), data.Path)
+	}
+
+	effStr := fmt.Sprintf("\n%s %d %%", Formatting.Header("Image efficiency score:"), int(100.0*view.efficiency))
+	spaceStr := fmt.Sprintf("%s %s\n", Formatting.Header("Potential wasted space:"),  humanize.Bytes(uint64(wastedSpace)))
+
 	view.gui.Update(func(g *gocui.Gui) error {
 		// update header
 		view.header.Clear()
@@ -71,9 +95,13 @@ func (view *DetailsView) Render() error {
 
 		// update contents
 		view.view.Clear()
-		fmt.Fprintln(view.view, Formatting.Header("Command"))
+		fmt.Fprintln(view.view, Formatting.Header("Layer Command"))
 		fmt.Fprintln(view.view, currentLayer.History.CreatedBy)
 
+		fmt.Fprintln(view.view, effStr)
+		fmt.Fprintln(view.view, spaceStr)
+
+		fmt.Fprintln(view.view, inefficiencyReport)
 		return nil
 	})
 	return nil
diff --git a/ui/layerview.go b/ui/layerview.go
index b36f6ea..1815d86 100644
--- a/ui/layerview.go
+++ b/ui/layerview.go
@@ -128,7 +128,7 @@ func (view *LayerView) Render() error {
 		view.header.Clear()
 		width, _ := g.Size()
 		headerStr := fmt.Sprintf("[%s]%s\n", title, strings.Repeat("─", width*2))
-		headerStr += fmt.Sprintf("Cmp "+image.LayerFormat, "Image ID", "%Eff.", "Size", "Command")
+		headerStr += fmt.Sprintf("Cmp "+image.LayerFormat, "Image ID", "Size", "Command")
 		fmt.Fprintln(view.header, Formatting.Header(vtclean.Clean(headerStr, false)))
 
 		// update contents
@@ -146,7 +146,7 @@ func (view *LayerView) Render() error {
 					layerId = fmt.Sprintf("%-25s", layer.History.ID)
 				}
 
-				layerStr = fmt.Sprintf(image.LayerFormat, layerId, "", humanize.Bytes(uint64(layer.History.Size)), "FROM "+layer.Id())
+				layerStr = fmt.Sprintf(image.LayerFormat, layerId, humanize.Bytes(uint64(layer.History.Size)), "FROM "+layer.Id())
 			}
 
 			compareBar := view.renderCompareBar(idx)
diff --git a/ui/ui.go b/ui/ui.go
index ca7cd04..bc4e6fa 100644
--- a/ui/ui.go
+++ b/ui/ui.go
@@ -311,6 +311,10 @@ func Run(layers []*image.Layer, refTrees []*filetree.FileTree) {
 	//g.Mouse = true
 	g.SetManagerFunc(layout)
 
+	// perform the first update and render now that all resources have been loaded
+	Update()
+	Render()
+
 	// let the default position of the cursor be the last layer
 	// Views.Layer.SetCursor(len(Views.Layer.Layers)-1)