dive-zfs/filetree/efficiency.go
2018-10-14 10:57:59 -04:00

81 lines
1.9 KiB
Go

package filetree
import (
"sort"
)
type EfficiencySlice []*EfficiencyData
type EfficiencyData struct {
Path string
Nodes []*FileNode
CumulativeSize int64
minDiscoveredSize int64
}
func (d EfficiencySlice) Len() int {
return len(d)
}
func (d EfficiencySlice) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
}
func (d EfficiencySlice) Less(i, j int) bool {
return d[i].CumulativeSize < d[j].CumulativeSize
}
// 1. Files that are duplicated across layers discounts your score, weighted by file size
// 2. Files that are removed discounts your score, weighted by the original file size
func Efficiency(trees []*FileTree) (float64, EfficiencySlice) {
efficiencyMap := make(map[string]*EfficiencyData)
inefficientMatches := make(EfficiencySlice, 0)
visitor := func(node *FileNode) error {
path := node.Path()
if _, ok := efficiencyMap[path]; !ok {
efficiencyMap[path] = &EfficiencyData{
Path: path,
Nodes: make([]*FileNode,0),
minDiscoveredSize: -1,
}
}
data := efficiencyMap[path]
data.CumulativeSize += node.Data.FileInfo.TarHeader.Size
if data.minDiscoveredSize < 0 || node.Data.FileInfo.TarHeader.Size < data.minDiscoveredSize {
data.minDiscoveredSize = node.Data.FileInfo.TarHeader.Size
}
data.Nodes = append(data.Nodes, node)
if len(data.Nodes) == 2 {
inefficientMatches = append(inefficientMatches, data)
}
return nil
}
visitEvaluator := func(node *FileNode) bool {
return node.IsLeaf()
}
for _, tree := range trees {
tree.VisitDepthChildFirst(visitor, visitEvaluator)
}
// calculate the score
var minimumPathSizes int64
var discoveredPathSizes int64
for _, value := range efficiencyMap {
minimumPathSizes += value.minDiscoveredSize
discoveredPathSizes += value.CumulativeSize
}
score := float64(minimumPathSizes) / float64(discoveredPathSizes)
sort.Sort(inefficientMatches)
return score, inefficientMatches
}