* rework CI validation workflow and makefile * enable push * fix job names * fix license check * fix snapshot builds * fix acceptance tests * fix linting * disable pull request event * rework windows runner caching * disable release pipeline and add issue templates
134 lines
3.6 KiB
Go
134 lines
3.6 KiB
Go
package filetree
|
|
|
|
import (
|
|
"sort"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// EfficiencyData represents the storage and reference statistics for a given file tree path.
|
|
type EfficiencyData struct {
|
|
Path string
|
|
Nodes []*FileNode
|
|
CumulativeSize int64
|
|
minDiscoveredSize int64
|
|
}
|
|
|
|
// EfficiencySlice represents an ordered set of EfficiencyData data structures.
|
|
type EfficiencySlice []*EfficiencyData
|
|
|
|
// Len is required for sorting.
|
|
func (efs EfficiencySlice) Len() int {
|
|
return len(efs)
|
|
}
|
|
|
|
// Swap operation is required for sorting.
|
|
func (efs EfficiencySlice) Swap(i, j int) {
|
|
efs[i], efs[j] = efs[j], efs[i]
|
|
}
|
|
|
|
// Less comparison is required for sorting.
|
|
func (efs EfficiencySlice) Less(i, j int) bool {
|
|
return efs[i].CumulativeSize < efs[j].CumulativeSize
|
|
}
|
|
|
|
// Efficiency returns the score and file set of the given set of FileTrees (layers). This is loosely based on:
|
|
// 1. Files that are duplicated across layers discounts your score, weighted by file size
|
|
// 2. Files that are removed discounts your score, weighted by the original file size
|
|
func Efficiency(trees []*FileTree) (float64, EfficiencySlice) {
|
|
efficiencyMap := make(map[string]*EfficiencyData)
|
|
inefficientMatches := make(EfficiencySlice, 0)
|
|
currentTree := 0
|
|
|
|
visitor := func(node *FileNode) error {
|
|
path := node.Path()
|
|
if _, ok := efficiencyMap[path]; !ok {
|
|
efficiencyMap[path] = &EfficiencyData{
|
|
Path: path,
|
|
Nodes: make([]*FileNode, 0),
|
|
minDiscoveredSize: -1,
|
|
}
|
|
}
|
|
data := efficiencyMap[path]
|
|
|
|
// this node may have had children that were deleted, however, we won't explicitly list out every child, only
|
|
// the top-most parent with the cumulative size. These operations will need to be done on the full (stacked)
|
|
// tree.
|
|
// Note: whiteout files may also represent directories, so we need to find out if this was previously a file or dir.
|
|
var sizeBytes int64
|
|
|
|
if node.IsWhiteout() {
|
|
sizer := func(curNode *FileNode) error {
|
|
sizeBytes += curNode.Data.FileInfo.Size
|
|
return nil
|
|
}
|
|
stackedTree, failedPaths, err := StackTreeRange(trees, 0, currentTree-1)
|
|
if len(failedPaths) > 0 {
|
|
for _, path := range failedPaths {
|
|
logrus.Errorf(path.String())
|
|
}
|
|
}
|
|
if err != nil {
|
|
logrus.Errorf("unable to stack tree range: %+v", err)
|
|
return err
|
|
}
|
|
|
|
previousTreeNode, err := stackedTree.GetNode(node.Path())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if previousTreeNode.Data.FileInfo.IsDir {
|
|
err = previousTreeNode.VisitDepthChildFirst(sizer, nil)
|
|
if err != nil {
|
|
logrus.Errorf("unable to propagate whiteout dir: %+v", err)
|
|
return err
|
|
}
|
|
}
|
|
} else {
|
|
sizeBytes = node.Data.FileInfo.Size
|
|
}
|
|
|
|
data.CumulativeSize += sizeBytes
|
|
if data.minDiscoveredSize < 0 || sizeBytes < data.minDiscoveredSize {
|
|
data.minDiscoveredSize = sizeBytes
|
|
}
|
|
data.Nodes = append(data.Nodes, node)
|
|
|
|
if len(data.Nodes) == 2 {
|
|
inefficientMatches = append(inefficientMatches, data)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
visitEvaluator := func(node *FileNode) bool {
|
|
return node.IsLeaf()
|
|
}
|
|
for idx, tree := range trees {
|
|
currentTree = idx
|
|
err := tree.VisitDepthChildFirst(visitor, visitEvaluator)
|
|
if err != nil {
|
|
logrus.Errorf("unable to propagate ref tree: %+v", err)
|
|
}
|
|
}
|
|
|
|
// calculate the score
|
|
var minimumPathSizes int64
|
|
var discoveredPathSizes int64
|
|
|
|
for _, value := range efficiencyMap {
|
|
minimumPathSizes += value.minDiscoveredSize
|
|
discoveredPathSizes += value.CumulativeSize
|
|
}
|
|
var score float64
|
|
if discoveredPathSizes == 0 {
|
|
score = 1.0
|
|
} else {
|
|
score = float64(minimumPathSizes) / float64(discoveredPathSizes)
|
|
}
|
|
|
|
sort.Sort(inefficientMatches)
|
|
|
|
return score, inefficientMatches
|
|
}
|