Size based efficiency (#24)

This commit is contained in:
Alex Goodman 2018-10-14 10:57:59 -04:00 committed by GitHub
parent 85fa13501d
commit 18b405db72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 121 additions and 39 deletions

80
filetree/efficiency.go Normal file
View File

@ -0,0 +1,80 @@
package filetree
import (
"sort"
)
type EfficiencySlice []*EfficiencyData
type EfficiencyData struct {
Path string
Nodes []*FileNode
CumulativeSize int64
minDiscoveredSize int64
}
func (d EfficiencySlice) Len() int {
return len(d)
}
func (d EfficiencySlice) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
}
func (d EfficiencySlice) Less(i, j int) bool {
return d[i].CumulativeSize < d[j].CumulativeSize
}
// 1. Files that are duplicated across layers discounts your score, weighted by file size
// 2. Files that are removed discounts your score, weighted by the original file size
func Efficiency(trees []*FileTree) (float64, EfficiencySlice) {
efficiencyMap := make(map[string]*EfficiencyData)
inefficientMatches := make(EfficiencySlice, 0)
visitor := func(node *FileNode) error {
path := node.Path()
if _, ok := efficiencyMap[path]; !ok {
efficiencyMap[path] = &EfficiencyData{
Path: path,
Nodes: make([]*FileNode,0),
minDiscoveredSize: -1,
}
}
data := efficiencyMap[path]
data.CumulativeSize += node.Data.FileInfo.TarHeader.Size
if data.minDiscoveredSize < 0 || node.Data.FileInfo.TarHeader.Size < data.minDiscoveredSize {
data.minDiscoveredSize = node.Data.FileInfo.TarHeader.Size
}
data.Nodes = append(data.Nodes, node)
if len(data.Nodes) == 2 {
inefficientMatches = append(inefficientMatches, data)
}
return nil
}
visitEvaluator := func(node *FileNode) bool {
return node.IsLeaf()
}
for _, tree := range trees {
tree.VisitDepthChildFirst(visitor, visitEvaluator)
}
// calculate the score
var minimumPathSizes int64
var discoveredPathSizes int64
for _, value := range efficiencyMap {
minimumPathSizes += value.minDiscoveredSize
discoveredPathSizes += value.CumulativeSize
}
score := float64(minimumPathSizes) / float64(discoveredPathSizes)
sort.Sort(inefficientMatches)
return score, inefficientMatches
}

View File

@ -262,30 +262,3 @@ func StackRange(trees []*FileTree, start, stop int) *FileTree {
return tree
}
// EfficiencyMap creates a map[string]int showing how often each int
// appears in the
func EfficiencyMap(trees []*FileTree) map[string]int {
result := make(map[string]int)
visitor := func(node *FileNode) error {
result[node.Path()]++
return nil
}
visitEvaluator := func(node *FileNode) bool {
return node.IsLeaf()
}
for _, tree := range trees {
tree.VisitDepthChildFirst(visitor, visitEvaluator)
}
return result
}
func EfficiencyScore(trees []*FileTree) float64 {
efficiencyMap := EfficiencyMap(trees)
uniquePaths := len(efficiencyMap)
pathAppearances := 0
for _, value := range efficiencyMap {
pathAppearances += value
}
return float64(uniquePaths) / float64(pathAppearances)
}

View File

@ -554,7 +554,7 @@ func TestEfficiencyScore(t *testing.T) {
trees[ix] = tree
}
expected := 2.0 / 6.0
actual := EfficiencyScore(trees)
actual := CalculateEfficiency(trees)
if math.Abs(expected-actual) > 0.0001 {
t.Fatalf("Expected %f but got %f", expected, actual)
}
@ -567,7 +567,7 @@ func TestEfficiencyScore(t *testing.T) {
trees[ix] = tree
}
expected = 1.0
actual = EfficiencyScore(trees)
actual = CalculateEfficiency(trees)
if math.Abs(expected-actual) > 0.0001 {
t.Fatalf("Expected %f but got %f", expected, actual)
}

View File

@ -17,10 +17,6 @@ import (
"golang.org/x/net/context"
)
const (
LayerFormat = "%-25s %5s %7s %s"
)
func check(e error) {
if e != nil {
panic(e)

View File

@ -4,10 +4,13 @@ import (
"github.com/wagoodman/dive/filetree"
"strings"
"fmt"
"strconv"
"github.com/dustin/go-humanize"
)
const (
LayerFormat = "%-25s %7s %s"
)
type Layer struct {
TarPath string
History ImageHistoryEntry
@ -35,8 +38,6 @@ func (layer *Layer) String() string {
return fmt.Sprintf(LayerFormat,
layer.Id(),
strconv.Itoa(int(100.0*filetree.EfficiencyScore(layer.RefTrees[:layer.Index+1]))) + "%",
//"100%",
humanize.Bytes(uint64(layer.History.Size)),
strings.TrimPrefix(layer.History.CreatedBy, "/bin/sh -c "))
}

View File

@ -6,6 +6,9 @@ import (
"github.com/jroimartin/gocui"
"github.com/lunixbochs/vtclean"
"strings"
"github.com/wagoodman/dive/filetree"
"strconv"
"github.com/dustin/go-humanize"
)
type DetailsView struct {
@ -13,6 +16,8 @@ type DetailsView struct {
gui *gocui.Gui
view *gocui.View
header *gocui.View
efficiency float64
inefficiencies filetree.EfficiencySlice
}
func NewStatisticsView(name string, gui *gocui.Gui) (detailsview *DetailsView) {
@ -55,13 +60,32 @@ func (view *DetailsView) IsVisible() bool {
return true
}
// we only need to update this view upon the initial tree load
func (view *DetailsView) Update() error {
layerTrees := Views.Tree.RefTrees
view.efficiency, view.inefficiencies = filetree.Efficiency(layerTrees[:len(layerTrees)-1])
return nil
}
func (view *DetailsView) Render() error {
currentLayer := Views.Layer.currentLayer()
var wastedSpace int64
template := "%5s %12s %-s\n"
inefficiencyReport := fmt.Sprintf(Formatting.Header(template), "Count", "Total Space", "Path")
for idx := len(view.inefficiencies)-1; idx > 0; idx-- {
data := view.inefficiencies[idx]
if data.CumulativeSize == 0 {
continue
}
wastedSpace += data.CumulativeSize
inefficiencyReport += fmt.Sprintf(template, strconv.Itoa(len(data.Nodes)), humanize.Bytes(uint64(data.CumulativeSize)), data.Path)
}
effStr := fmt.Sprintf("\n%s %d %%", Formatting.Header("Image efficiency score:"), int(100.0*view.efficiency))
spaceStr := fmt.Sprintf("%s %s\n", Formatting.Header("Potential wasted space:"), humanize.Bytes(uint64(wastedSpace)))
view.gui.Update(func(g *gocui.Gui) error {
// update header
view.header.Clear()
@ -71,9 +95,13 @@ func (view *DetailsView) Render() error {
// update contents
view.view.Clear()
fmt.Fprintln(view.view, Formatting.Header("Command"))
fmt.Fprintln(view.view, Formatting.Header("Layer Command"))
fmt.Fprintln(view.view, currentLayer.History.CreatedBy)
fmt.Fprintln(view.view, effStr)
fmt.Fprintln(view.view, spaceStr)
fmt.Fprintln(view.view, inefficiencyReport)
return nil
})
return nil

View File

@ -128,7 +128,7 @@ func (view *LayerView) Render() error {
view.header.Clear()
width, _ := g.Size()
headerStr := fmt.Sprintf("[%s]%s\n", title, strings.Repeat("─", width*2))
headerStr += fmt.Sprintf("Cmp "+image.LayerFormat, "Image ID", "%Eff.", "Size", "Command")
headerStr += fmt.Sprintf("Cmp "+image.LayerFormat, "Image ID", "Size", "Command")
fmt.Fprintln(view.header, Formatting.Header(vtclean.Clean(headerStr, false)))
// update contents
@ -146,7 +146,7 @@ func (view *LayerView) Render() error {
layerId = fmt.Sprintf("%-25s", layer.History.ID)
}
layerStr = fmt.Sprintf(image.LayerFormat, layerId, "", humanize.Bytes(uint64(layer.History.Size)), "FROM "+layer.Id())
layerStr = fmt.Sprintf(image.LayerFormat, layerId, humanize.Bytes(uint64(layer.History.Size)), "FROM "+layer.Id())
}
compareBar := view.renderCompareBar(idx)

View File

@ -311,6 +311,10 @@ func Run(layers []*image.Layer, refTrees []*filetree.FileTree) {
//g.Mouse = true
g.SetManagerFunc(layout)
// perform the first update and render now that all resources have been loaded
Update()
Render()
// let the default position of the cursor be the last layer
// Views.Layer.SetCursor(len(Views.Layer.Layers)-1)