From 18b405db72fe34b079dd3523d465563465fa1623 Mon Sep 17 00:00:00 2001 From: Alex Goodman <wagoodman@users.noreply.github.com> Date: Sun, 14 Oct 2018 10:57:59 -0400 Subject: [PATCH] Size based efficiency (#24) --- filetree/efficiency.go | 80 ++++++++++++++++++++++++++++++++++++++++++ filetree/tree.go | 27 -------------- filetree/tree_test.go | 4 +-- image/image.go | 4 --- image/layer.go | 7 ++-- ui/detailsview.go | 30 +++++++++++++++- ui/layerview.go | 4 +-- ui/ui.go | 4 +++ 8 files changed, 121 insertions(+), 39 deletions(-) create mode 100644 filetree/efficiency.go diff --git a/filetree/efficiency.go b/filetree/efficiency.go new file mode 100644 index 0000000..657f523 --- /dev/null +++ b/filetree/efficiency.go @@ -0,0 +1,80 @@ +package filetree + +import ( + "sort" +) + +type EfficiencySlice []*EfficiencyData + +type EfficiencyData struct { + Path string + Nodes []*FileNode + CumulativeSize int64 + minDiscoveredSize int64 +} + +func (d EfficiencySlice) Len() int { + return len(d) +} + +func (d EfficiencySlice) Swap(i, j int) { + d[i], d[j] = d[j], d[i] +} + +func (d EfficiencySlice) Less(i, j int) bool { + return d[i].CumulativeSize < d[j].CumulativeSize +} + + +// 1. Files that are duplicated across layers discounts your score, weighted by file size +// 2. Files that are removed discounts your score, weighted by the original file size +func Efficiency(trees []*FileTree) (float64, EfficiencySlice) { + efficiencyMap := make(map[string]*EfficiencyData) + inefficientMatches := make(EfficiencySlice, 0) + + visitor := func(node *FileNode) error { + path := node.Path() + if _, ok := efficiencyMap[path]; !ok { + efficiencyMap[path] = &EfficiencyData{ + Path: path, + Nodes: make([]*FileNode,0), + minDiscoveredSize: -1, + } + } + data := efficiencyMap[path] + data.CumulativeSize += node.Data.FileInfo.TarHeader.Size + if data.minDiscoveredSize < 0 || node.Data.FileInfo.TarHeader.Size < data.minDiscoveredSize { + data.minDiscoveredSize = node.Data.FileInfo.TarHeader.Size + } + data.Nodes = append(data.Nodes, node) + + if len(data.Nodes) == 2 { + inefficientMatches = append(inefficientMatches, data) + } + + return nil + } + visitEvaluator := func(node *FileNode) bool { + return node.IsLeaf() + } + for _, tree := range trees { + tree.VisitDepthChildFirst(visitor, visitEvaluator) + } + + + // calculate the score + var minimumPathSizes int64 + var discoveredPathSizes int64 + + for _, value := range efficiencyMap { + minimumPathSizes += value.minDiscoveredSize + discoveredPathSizes += value.CumulativeSize + } + score := float64(minimumPathSizes) / float64(discoveredPathSizes) + + sort.Sort(inefficientMatches) + + return score, inefficientMatches +} + + diff --git a/filetree/tree.go b/filetree/tree.go index a7bfddb..27703d5 100644 --- a/filetree/tree.go +++ b/filetree/tree.go @@ -262,30 +262,3 @@ func StackRange(trees []*FileTree, start, stop int) *FileTree { return tree } - -// EfficiencyMap creates a map[string]int showing how often each int -// appears in the -func EfficiencyMap(trees []*FileTree) map[string]int { - result := make(map[string]int) - visitor := func(node *FileNode) error { - result[node.Path()]++ - return nil - } - visitEvaluator := func(node *FileNode) bool { - return node.IsLeaf() - } - for _, tree := range trees { - tree.VisitDepthChildFirst(visitor, visitEvaluator) - } - return result -} - -func EfficiencyScore(trees []*FileTree) float64 { - efficiencyMap := EfficiencyMap(trees) - uniquePaths := len(efficiencyMap) - pathAppearances := 0 - for _, value := range efficiencyMap { - pathAppearances += value - } - return float64(uniquePaths) / float64(pathAppearances) -} diff --git a/filetree/tree_test.go b/filetree/tree_test.go index 2711ede..8d7eec7 100644 --- a/filetree/tree_test.go +++ b/filetree/tree_test.go @@ -554,7 +554,7 @@ func TestEfficiencyScore(t *testing.T) { trees[ix] = tree } expected := 2.0 / 6.0 - actual := EfficiencyScore(trees) + actual := CalculateEfficiency(trees) if math.Abs(expected-actual) > 0.0001 { t.Fatalf("Expected %f but got %f", expected, actual) } @@ -567,7 +567,7 @@ func TestEfficiencyScore(t *testing.T) { trees[ix] = tree } expected = 1.0 - actual = EfficiencyScore(trees) + actual = CalculateEfficiency(trees) if math.Abs(expected-actual) > 0.0001 { t.Fatalf("Expected %f but got %f", expected, actual) } diff --git a/image/image.go b/image/image.go index b923433..9b2d80f 100644 --- a/image/image.go +++ b/image/image.go @@ -17,10 +17,6 @@ import ( "golang.org/x/net/context" ) -const ( - LayerFormat = "%-25s %5s %7s %s" -) - func check(e error) { if e != nil { panic(e) diff --git a/image/layer.go b/image/layer.go index 85d7ae2..39a88c6 100644 --- a/image/layer.go +++ b/image/layer.go @@ -4,10 +4,13 @@ import ( "github.com/wagoodman/dive/filetree" "strings" "fmt" - "strconv" "github.com/dustin/go-humanize" ) +const ( + LayerFormat = "%-25s %7s %s" +) + type Layer struct { TarPath string History ImageHistoryEntry @@ -35,8 +38,6 @@ func (layer *Layer) String() string { return fmt.Sprintf(LayerFormat, layer.Id(), - strconv.Itoa(int(100.0*filetree.EfficiencyScore(layer.RefTrees[:layer.Index+1]))) + "%", - //"100%", humanize.Bytes(uint64(layer.History.Size)), strings.TrimPrefix(layer.History.CreatedBy, "/bin/sh -c ")) } diff --git a/ui/detailsview.go b/ui/detailsview.go index b215521..61a542d 100644 --- a/ui/detailsview.go +++ b/ui/detailsview.go @@ -6,6 +6,9 @@ import ( "github.com/jroimartin/gocui" "github.com/lunixbochs/vtclean" "strings" + "github.com/wagoodman/dive/filetree" + "strconv" + "github.com/dustin/go-humanize" ) type DetailsView struct { @@ -13,6 +16,8 @@ type DetailsView struct { gui *gocui.Gui view *gocui.View header *gocui.View + efficiency float64 + inefficiencies filetree.EfficiencySlice } func NewStatisticsView(name string, gui *gocui.Gui) (detailsview *DetailsView) { @@ -55,13 +60,32 @@ func (view *DetailsView) IsVisible() bool { return true } +// we only need to update this view upon the initial tree load func (view *DetailsView) Update() error { + layerTrees := Views.Tree.RefTrees + view.efficiency, view.inefficiencies = filetree.Efficiency(layerTrees[:len(layerTrees)-1]) return nil } func (view *DetailsView) Render() error { currentLayer := Views.Layer.currentLayer() + var wastedSpace int64 + + template := "%5s %12s %-s\n" + inefficiencyReport := fmt.Sprintf(Formatting.Header(template), "Count", "Total Space", "Path") + for idx := len(view.inefficiencies)-1; idx > 0; idx-- { + data := view.inefficiencies[idx] + if data.CumulativeSize == 0 { + continue + } + wastedSpace += data.CumulativeSize + inefficiencyReport += fmt.Sprintf(template, strconv.Itoa(len(data.Nodes)), humanize.Bytes(uint64(data.CumulativeSize)), data.Path) + } + + effStr := fmt.Sprintf("\n%s %d %%", Formatting.Header("Image efficiency score:"), int(100.0*view.efficiency)) + spaceStr := fmt.Sprintf("%s %s\n", Formatting.Header("Potential wasted space:"), humanize.Bytes(uint64(wastedSpace))) + view.gui.Update(func(g *gocui.Gui) error { // update header view.header.Clear() @@ -71,9 +95,13 @@ func (view *DetailsView) Render() error { // update contents view.view.Clear() - fmt.Fprintln(view.view, Formatting.Header("Command")) + fmt.Fprintln(view.view, Formatting.Header("Layer Command")) fmt.Fprintln(view.view, currentLayer.History.CreatedBy) + fmt.Fprintln(view.view, effStr) + fmt.Fprintln(view.view, spaceStr) + + fmt.Fprintln(view.view, inefficiencyReport) return nil }) return nil diff --git a/ui/layerview.go b/ui/layerview.go index b36f6ea..1815d86 100644 --- a/ui/layerview.go +++ b/ui/layerview.go @@ -128,7 +128,7 @@ func (view *LayerView) Render() error { view.header.Clear() width, _ := g.Size() headerStr := fmt.Sprintf("[%s]%s\n", title, strings.Repeat("─", width*2)) - headerStr += fmt.Sprintf("Cmp "+image.LayerFormat, "Image ID", "%Eff.", "Size", "Command") + headerStr += fmt.Sprintf("Cmp "+image.LayerFormat, "Image ID", "Size", "Command") fmt.Fprintln(view.header, Formatting.Header(vtclean.Clean(headerStr, false))) // update contents @@ -146,7 +146,7 @@ func (view *LayerView) Render() error { layerId = fmt.Sprintf("%-25s", layer.History.ID) } - layerStr = fmt.Sprintf(image.LayerFormat, layerId, "", humanize.Bytes(uint64(layer.History.Size)), "FROM "+layer.Id()) + layerStr = fmt.Sprintf(image.LayerFormat, layerId, humanize.Bytes(uint64(layer.History.Size)), "FROM "+layer.Id()) } compareBar := view.renderCompareBar(idx) diff --git a/ui/ui.go b/ui/ui.go index ca7cd04..bc4e6fa 100644 --- a/ui/ui.go +++ b/ui/ui.go @@ -311,6 +311,10 @@ func Run(layers []*image.Layer, refTrees []*filetree.FileTree) { //g.Mouse = true g.SetManagerFunc(layout) + // perform the first update and render now that all resources have been loaded + Update() + Render() + // let the default position of the cursor be the last layer // Views.Layer.SetCursor(len(Views.Layer.Layers)-1)