git-sizer/sizes/sizes.go

305 строки
10 KiB
Go
Исходник Постоянная ссылка Обычный вид История

package sizes
import (
"fmt"
2018-02-26 08:12:49 +03:00
"github.com/github/git-sizer/counts"
2018-02-26 08:30:45 +03:00
"github.com/github/git-sizer/git"
)
type Size interface {
fmt.Stringer
}
2017-10-29 11:07:11 +03:00
type BlobSize struct {
2018-02-26 08:12:49 +03:00
Size counts.Count32
2017-10-29 11:07:11 +03:00
}
type TreeSize struct {
// The maximum depth of trees and blobs starting at this object
// (not including this object).
2018-02-26 08:12:49 +03:00
MaxPathDepth counts.Count32 `json:"max_path_depth"`
// The maximum length of any path relative to this object, in
// characters.
2018-02-26 08:12:49 +03:00
MaxPathLength counts.Count32 `json:"max_path_length"`
// The total number of trees, including duplicates.
2018-02-26 08:12:49 +03:00
ExpandedTreeCount counts.Count32 `json:"expanded_tree_count"`
// The total number of blobs, including duplicates.
2018-02-26 08:12:49 +03:00
ExpandedBlobCount counts.Count32 `json:"expanded_blob_count"`
// The total size of all blobs, including duplicates.
2018-02-26 08:12:49 +03:00
ExpandedBlobSize counts.Count64 `json:"expanded_blob_size"`
// The total number of symbolic links, including duplicates.
2018-02-26 08:12:49 +03:00
ExpandedLinkCount counts.Count32 `json:"expanded_link_count"`
// The total number of submodules referenced, including duplicates.
2018-02-26 08:12:49 +03:00
ExpandedSubmoduleCount counts.Count32 `json:"expanded_submodule_count"`
}
func (s *TreeSize) addDescendent(filename string, s2 TreeSize) {
s.MaxPathDepth.AdjustMaxIfNecessary(s2.MaxPathDepth.Plus(1))
if s2.MaxPathLength > 0 {
s.MaxPathLength.AdjustMaxIfNecessary(
2018-02-26 08:12:49 +03:00
(counts.NewCount32(uint64(len(filename))) + 1).Plus(s2.MaxPathLength),
)
} else {
2018-02-26 08:12:49 +03:00
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
}
2017-10-29 09:42:40 +03:00
s.ExpandedTreeCount.Increment(s2.ExpandedTreeCount)
s.ExpandedBlobCount.Increment(s2.ExpandedBlobCount)
s.ExpandedBlobSize.Increment(s2.ExpandedBlobSize)
s.ExpandedLinkCount.Increment(s2.ExpandedLinkCount)
s.ExpandedSubmoduleCount.Increment(s2.ExpandedSubmoduleCount)
}
// Record that the object has a blob of the specified `size` as a
// direct descendant.
func (s *TreeSize) addBlob(filename string, size BlobSize) {
s.MaxPathDepth.AdjustMaxIfNecessary(1)
2018-02-26 08:12:49 +03:00
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
s.ExpandedBlobSize.Increment(counts.Count64(size.Size))
2017-10-29 09:42:40 +03:00
s.ExpandedBlobCount.Increment(1)
}
// Record that the object has a link as a direct descendant.
func (s *TreeSize) addLink(filename string) {
s.MaxPathDepth.AdjustMaxIfNecessary(1)
2018-02-26 08:12:49 +03:00
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
2017-10-29 09:42:40 +03:00
s.ExpandedLinkCount.Increment(1)
}
// Record that the object has a submodule as a direct descendant.
func (s *TreeSize) addSubmodule(filename string) {
s.MaxPathDepth.AdjustMaxIfNecessary(1)
2018-02-26 08:12:49 +03:00
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
2017-10-29 09:42:40 +03:00
s.ExpandedSubmoduleCount.Increment(1)
}
type CommitSize struct {
// The height of the ancestor graph, including this commit.
2018-02-26 08:12:49 +03:00
MaxAncestorDepth counts.Count32 `json:"max_ancestor_depth"`
}
func (s *CommitSize) addParent(s2 CommitSize) {
s.MaxAncestorDepth.AdjustMaxIfNecessary(s2.MaxAncestorDepth)
}
func (s *CommitSize) addTree(s2 TreeSize) {
}
2017-10-30 09:09:40 +03:00
type TagSize struct {
// The number of tags that have to be traversed (including this
// one) to get to an object.
2018-02-26 08:12:49 +03:00
TagDepth counts.Count32
2017-10-30 09:09:40 +03:00
}
2017-10-30 05:44:06 +03:00
type HistorySize struct {
// The total number of unique commits analyzed.
2018-02-26 08:12:49 +03:00
UniqueCommitCount counts.Count32 `json:"unique_commit_count"`
2017-10-30 05:44:06 +03:00
// The total size of all commits analyzed.
2018-02-26 08:12:49 +03:00
UniqueCommitSize counts.Count64 `json:"unique_commit_size"`
2017-10-30 05:44:06 +03:00
// The maximum size of any analyzed commit.
2018-02-26 08:12:49 +03:00
MaxCommitSize counts.Count32 `json:"max_commit_size"`
2017-10-30 05:44:06 +03:00
// The commit with the maximum size.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxCommitSizeCommit *Path `json:"max_commit,omitempty"`
2017-10-30 05:44:06 +03:00
// The maximum ancestor depth of any analyzed commit.
2018-02-26 08:12:49 +03:00
MaxHistoryDepth counts.Count32 `json:"max_history_depth"`
2017-10-30 05:44:06 +03:00
// The maximum number of direct parents of any analyzed commit.
2018-02-26 08:12:49 +03:00
MaxParentCount counts.Count32 `json:"max_parent_count"`
2017-10-30 05:44:06 +03:00
// The commit with the maximum number of direct parents.
MaxParentCountCommit *Path `json:"max_parent_count_commit,omitempty"`
2017-10-30 05:44:06 +03:00
// The total number of unique trees analyzed.
2018-02-26 08:12:49 +03:00
UniqueTreeCount counts.Count32 `json:"unique_tree_count"`
2017-10-30 05:44:06 +03:00
// The total size of all trees analyzed.
2018-02-26 08:12:49 +03:00
UniqueTreeSize counts.Count64 `json:"unique_tree_size"`
2017-10-30 05:44:06 +03:00
// The total number of tree entries in all unique trees analyzed.
2018-02-26 08:12:49 +03:00
UniqueTreeEntries counts.Count64 `json:"unique_tree_entries"`
2017-10-30 05:44:06 +03:00
// The maximum number of entries an a tree.
2018-02-26 08:12:49 +03:00
MaxTreeEntries counts.Count32 `json:"max_tree_entries"`
// The tree with the maximum number of entries.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxTreeEntriesTree *Path `json:"max_tree_entries_tree,omitempty"`
2017-10-30 05:44:06 +03:00
// The total number of unique blobs analyzed.
2018-02-26 08:12:49 +03:00
UniqueBlobCount counts.Count32 `json:"unique_blob_count"`
2017-10-30 05:44:06 +03:00
// The total size of all of the unique blobs analyzed.
2018-02-26 08:12:49 +03:00
UniqueBlobSize counts.Count64 `json:"unique_blob_size"`
2017-10-30 05:44:06 +03:00
// The maximum size of any analyzed blob.
2018-02-26 08:12:49 +03:00
MaxBlobSize counts.Count32 `json:"max_blob_size"`
// The biggest blob found.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxBlobSizeBlob *Path `json:"max_blob_size_blob,omitempty"`
2017-10-30 05:44:06 +03:00
// The total number of unique tag objects analyzed.
2018-02-26 08:12:49 +03:00
UniqueTagCount counts.Count32 `json:"unique_tag_count"`
2017-10-30 05:44:06 +03:00
2017-10-30 09:09:40 +03:00
// The maximum number of tags in a chain.
2018-02-26 08:12:49 +03:00
MaxTagDepth counts.Count32 `json:"max_tag_depth"`
2017-10-30 09:09:40 +03:00
// The tag with the maximum tag depth.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxTagDepthTag *Path `json:"max_tag_depth_tag,omitempty"`
// The number of references analyzed. Note that we don't eliminate
// duplicates if the user passes the same reference more than
// once.
2018-02-26 08:12:49 +03:00
ReferenceCount counts.Count32 `json:"reference_count"`
// ReferenceGroups keeps track of how many references in each
// reference group were scanned.
ReferenceGroups map[RefGroupSymbol]*counts.Count32 `json:"reference_groups"`
2017-10-30 05:44:06 +03:00
// The maximum TreeSize in the analyzed history (where each
// attribute is maximized separately).
// The maximum depth of trees and blobs starting at this object
// (not including this object).
2018-02-26 08:12:49 +03:00
MaxPathDepth counts.Count32 `json:"max_path_depth"`
// The tree with the maximum path depth.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxPathDepthTree *Path `json:"max_path_depth_tree,omitempty"`
// The maximum length of any path relative to this object, in
// characters.
2018-02-26 08:12:49 +03:00
MaxPathLength counts.Count32 `json:"max_path_length"`
// The tree with the maximum path length.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxPathLengthTree *Path `json:"max_path_length_tree,omitempty"`
// The total number of trees, including duplicates.
2018-02-26 08:12:49 +03:00
MaxExpandedTreeCount counts.Count32 `json:"max_expanded_tree_count"`
// The tree with the maximum expanded tree count.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxExpandedTreeCountTree *Path `json:"max_expanded_tree_count_tree,omitempty"`
// The total number of blobs, including duplicates.
2018-02-26 08:12:49 +03:00
MaxExpandedBlobCount counts.Count32 `json:"max_expanded_blob_count"`
// The tree with the maximum expanded blob count.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxExpandedBlobCountTree *Path `json:"max_expanded_blob_count_tree,omitempty"`
// The total size of all blobs, including duplicates.
2018-02-26 08:12:49 +03:00
MaxExpandedBlobSize counts.Count64 `json:"max_expanded_blob_size"`
// The tree with the maximum expanded blob size.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxExpandedBlobSizeTree *Path `json:"max_expanded_blob_size_tree,omitempty"`
// The total number of symbolic links, including duplicates.
2018-02-26 08:12:49 +03:00
MaxExpandedLinkCount counts.Count32 `json:"max_expanded_link_count"`
// The tree with the maximum expanded link count.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxExpandedLinkCountTree *Path `json:"max_expanded_link_count_tree,omitempty"`
// The total number of submodules referenced, including duplicates.
2018-02-26 08:12:49 +03:00
MaxExpandedSubmoduleCount counts.Count32 `json:"max_expanded_submodule_count"`
// The tree with the maximum expanded submodule count.
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
MaxExpandedSubmoduleCountTree *Path `json:"max_expanded_submodule_count_tree,omitempty"`
}
// Convenience function: forget `*path` if it is non-nil and overwrite
// it with a `*Path` for the object corresponding to `(oid,
// objectType)`. This function can be used if a new largest item was
// found.
func setPath(
pr PathResolver,
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
path **Path,
2018-02-26 08:39:53 +03:00
oid git.OID, objectType string) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
if *path != nil {
pr.ForgetPath(*path)
}
*path = pr.RequestPath(oid, objectType)
2017-10-30 05:44:06 +03:00
}
2018-02-26 08:39:53 +03:00
func (s *HistorySize) recordBlob(g *Graph, oid git.OID, blobSize BlobSize) {
2017-10-30 05:44:06 +03:00
s.UniqueBlobCount.Increment(1)
2018-02-26 08:12:49 +03:00
s.UniqueBlobSize.Increment(counts.Count64(blobSize.Size))
if s.MaxBlobSize.AdjustMaxIfNecessary(blobSize.Size) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxBlobSizeBlob, oid, "blob")
}
2017-10-30 05:44:06 +03:00
}
2018-02-26 08:12:49 +03:00
func (s *HistorySize) recordTree(
2018-02-26 08:39:53 +03:00
g *Graph, oid git.OID, treeSize TreeSize, size counts.Count32, treeEntries counts.Count32,
2018-02-26 08:12:49 +03:00
) {
2017-10-30 05:44:06 +03:00
s.UniqueTreeCount.Increment(1)
2018-02-26 08:12:49 +03:00
s.UniqueTreeSize.Increment(counts.Count64(size))
s.UniqueTreeEntries.Increment(counts.Count64(treeEntries))
if s.MaxTreeEntries.AdjustMaxIfNecessary(treeEntries) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxTreeEntriesTree, oid, "tree")
}
if s.MaxPathDepth.AdjustMaxIfNecessary(treeSize.MaxPathDepth) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxPathDepthTree, oid, "tree")
}
if s.MaxPathLength.AdjustMaxIfNecessary(treeSize.MaxPathLength) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxPathLengthTree, oid, "tree")
}
if s.MaxExpandedTreeCount.AdjustMaxIfNecessary(treeSize.ExpandedTreeCount) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxExpandedTreeCountTree, oid, "tree")
}
if s.MaxExpandedBlobCount.AdjustMaxIfNecessary(treeSize.ExpandedBlobCount) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxExpandedBlobCountTree, oid, "tree")
}
if s.MaxExpandedBlobSize.AdjustMaxIfNecessary(treeSize.ExpandedBlobSize) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxExpandedBlobSizeTree, oid, "tree")
}
if s.MaxExpandedLinkCount.AdjustMaxIfNecessary(treeSize.ExpandedLinkCount) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxExpandedLinkCountTree, oid, "tree")
}
if s.MaxExpandedSubmoduleCount.AdjustMaxIfNecessary(treeSize.ExpandedSubmoduleCount) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxExpandedSubmoduleCountTree, oid, "tree")
}
2017-10-30 05:44:06 +03:00
}
2018-02-26 08:12:49 +03:00
func (s *HistorySize) recordCommit(
2018-02-26 08:39:53 +03:00
g *Graph, oid git.OID, commitSize CommitSize,
2018-02-26 08:12:49 +03:00
size counts.Count32, parentCount counts.Count32,
) {
2017-10-30 05:44:06 +03:00
s.UniqueCommitCount.Increment(1)
2018-02-26 08:12:49 +03:00
s.UniqueCommitSize.Increment(counts.Count64(size))
if s.MaxCommitSize.AdjustMaxIfPossible(size) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxCommitSizeCommit, oid, "commit")
}
s.MaxHistoryDepth.AdjustMaxIfPossible(commitSize.MaxAncestorDepth)
if s.MaxParentCount.AdjustMaxIfPossible(parentCount) {
setPath(g.pathResolver, &s.MaxParentCountCommit, oid, "commit")
}
2017-10-30 05:44:06 +03:00
}
2018-02-26 08:39:53 +03:00
func (s *HistorySize) recordTag(g *Graph, oid git.OID, tagSize TagSize, size counts.Count32) {
2017-10-30 09:09:40 +03:00
s.UniqueTagCount.Increment(1)
if s.MaxTagDepth.AdjustMaxIfNecessary(tagSize.TagDepth) {
Report the paths to the worst objects It is frustrating to find out that there is, say, a gigantic blob in a repository with no easy way to find out where it is in the history. (Git's tooling for answering that question is almost nonexistent.) So... * Define a new class, `PathResolver`, that can compute the paths (i.e., the `git rev-parse` expression) that can be used to access an object with a specific OID. This class must be fed the objects and references in depth-first order, but that's OK because that's the order we need to process them to compute their expanded sizes. * Change `Sizes` to record `*Path` pointers, rather than OIDs, for the big objects that it has found. * If `Sizes` finds a new object that is bigger than the biggest that it has seen before, have it discard the (maybe only partly-computed) path of the old object and replace it with the new one. `PathResolver` knows how to free resources associated with the partly-computed paths, so basically the number of paths being computed at any time is at most one for each of the "largest" things that we want to track. * Output the paths (still in crude format) to the JSON output. This costs about 15% in runtime and hardly any additional memory consumption. Still to do: (1) Improve the format of the path output. (2) Emit better output for commits that are not referred to directly by references (perhaps using `git name-rev`). (3) Include the paths in non-JSON output, and probably change where they are presented in JSON format.
2017-12-08 19:14:47 +03:00
setPath(g.pathResolver, &s.MaxTagDepthTag, oid, "tag")
}
2017-10-30 09:09:40 +03:00
}
2018-02-26 08:30:45 +03:00
func (s *HistorySize) recordReference(g *Graph, ref git.Reference) {
s.ReferenceCount.Increment(1)
}
func (s *HistorySize) recordReferenceGroup(g *Graph, group RefGroupSymbol) {
c, ok := s.ReferenceGroups[group]
if ok {
c.Increment(1)
} else {
n := counts.Count32(1)
s.ReferenceGroups[group] = &n
}
}