2017-10-14 11:00:38 +03:00
|
|
|
package sizes
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2018-02-26 08:12:49 +03:00
|
|
|
|
|
|
|
"github.com/github/git-sizer/counts"
|
2018-02-26 08:30:45 +03:00
|
|
|
"github.com/github/git-sizer/git"
|
2017-10-14 11:00:38 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
type Size interface {
|
|
|
|
fmt.Stringer
|
|
|
|
}
|
|
|
|
|
2017-10-29 11:07:11 +03:00
|
|
|
type BlobSize struct {
|
2018-02-26 08:12:49 +03:00
|
|
|
Size counts.Count32
|
2017-10-29 11:07:11 +03:00
|
|
|
}
|
2017-10-14 11:00:38 +03:00
|
|
|
|
|
|
|
type TreeSize struct {
|
2017-10-29 07:22:38 +03:00
|
|
|
// The maximum depth of trees and blobs starting at this object
|
2018-03-11 12:50:20 +03:00
|
|
|
// (not including this object).
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxPathDepth counts.Count32 `json:"max_path_depth"`
|
2017-10-14 11:00:38 +03:00
|
|
|
|
2017-10-29 07:22:38 +03:00
|
|
|
// The maximum length of any path relative to this object, in
|
|
|
|
// characters.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxPathLength counts.Count32 `json:"max_path_length"`
|
2017-10-14 11:00:38 +03:00
|
|
|
|
2017-10-30 09:56:23 +03:00
|
|
|
// The total number of trees, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
ExpandedTreeCount counts.Count32 `json:"expanded_tree_count"`
|
2017-10-14 11:00:38 +03:00
|
|
|
|
2017-10-30 09:56:23 +03:00
|
|
|
// The total number of blobs, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
ExpandedBlobCount counts.Count32 `json:"expanded_blob_count"`
|
2017-10-14 11:00:38 +03:00
|
|
|
|
2017-10-30 09:56:23 +03:00
|
|
|
// The total size of all blobs, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
ExpandedBlobSize counts.Count64 `json:"expanded_blob_size"`
|
2017-10-14 11:00:38 +03:00
|
|
|
|
2017-10-30 09:56:23 +03:00
|
|
|
// The total number of symbolic links, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
ExpandedLinkCount counts.Count32 `json:"expanded_link_count"`
|
2017-10-14 11:00:38 +03:00
|
|
|
|
2017-10-30 09:56:23 +03:00
|
|
|
// The total number of submodules referenced, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
ExpandedSubmoduleCount counts.Count32 `json:"expanded_submodule_count"`
|
2017-10-14 11:00:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *TreeSize) addDescendent(filename string, s2 TreeSize) {
|
2018-03-11 12:50:20 +03:00
|
|
|
s.MaxPathDepth.AdjustMaxIfNecessary(s2.MaxPathDepth.Plus(1))
|
2017-10-14 11:00:38 +03:00
|
|
|
if s2.MaxPathLength > 0 {
|
2017-12-11 19:05:52 +03:00
|
|
|
s.MaxPathLength.AdjustMaxIfNecessary(
|
2018-02-26 08:12:49 +03:00
|
|
|
(counts.NewCount32(uint64(len(filename))) + 1).Plus(s2.MaxPathLength),
|
2017-12-11 19:05:52 +03:00
|
|
|
)
|
2017-10-14 11:00:38 +03:00
|
|
|
} else {
|
2018-02-26 08:12:49 +03:00
|
|
|
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
|
2017-10-14 11:00:38 +03:00
|
|
|
}
|
2017-10-29 09:42:40 +03:00
|
|
|
s.ExpandedTreeCount.Increment(s2.ExpandedTreeCount)
|
|
|
|
s.ExpandedBlobCount.Increment(s2.ExpandedBlobCount)
|
|
|
|
s.ExpandedBlobSize.Increment(s2.ExpandedBlobSize)
|
|
|
|
s.ExpandedLinkCount.Increment(s2.ExpandedLinkCount)
|
|
|
|
s.ExpandedSubmoduleCount.Increment(s2.ExpandedSubmoduleCount)
|
2017-10-14 11:00:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Record that the object has a blob of the specified `size` as a
|
|
|
|
// direct descendant.
|
|
|
|
func (s *TreeSize) addBlob(filename string, size BlobSize) {
|
2017-12-11 19:05:52 +03:00
|
|
|
s.MaxPathDepth.AdjustMaxIfNecessary(1)
|
2018-02-26 08:12:49 +03:00
|
|
|
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
|
|
|
|
s.ExpandedBlobSize.Increment(counts.Count64(size.Size))
|
2017-10-29 09:42:40 +03:00
|
|
|
s.ExpandedBlobCount.Increment(1)
|
2017-10-14 11:00:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Record that the object has a link as a direct descendant.
|
|
|
|
func (s *TreeSize) addLink(filename string) {
|
2017-12-11 19:05:52 +03:00
|
|
|
s.MaxPathDepth.AdjustMaxIfNecessary(1)
|
2018-02-26 08:12:49 +03:00
|
|
|
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
|
2017-10-29 09:42:40 +03:00
|
|
|
s.ExpandedLinkCount.Increment(1)
|
2017-10-14 11:00:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Record that the object has a submodule as a direct descendant.
|
|
|
|
func (s *TreeSize) addSubmodule(filename string) {
|
2017-12-11 19:05:52 +03:00
|
|
|
s.MaxPathDepth.AdjustMaxIfNecessary(1)
|
2018-02-26 08:12:49 +03:00
|
|
|
s.MaxPathLength.AdjustMaxIfNecessary(counts.NewCount32(uint64(len(filename))))
|
2017-10-29 09:42:40 +03:00
|
|
|
s.ExpandedSubmoduleCount.Increment(1)
|
2017-10-14 11:00:38 +03:00
|
|
|
}
|
|
|
|
|
2017-10-29 09:14:33 +03:00
|
|
|
type CommitSize struct {
|
|
|
|
// The height of the ancestor graph, including this commit.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxAncestorDepth counts.Count32 `json:"max_ancestor_depth"`
|
2017-10-29 09:14:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *CommitSize) addParent(s2 CommitSize) {
|
2017-12-11 19:05:52 +03:00
|
|
|
s.MaxAncestorDepth.AdjustMaxIfNecessary(s2.MaxAncestorDepth)
|
2017-10-29 09:14:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *CommitSize) addTree(s2 TreeSize) {
|
|
|
|
}
|
|
|
|
|
2017-10-30 09:09:40 +03:00
|
|
|
type TagSize struct {
|
|
|
|
// The number of tags that have to be traversed (including this
|
|
|
|
// one) to get to an object.
|
2018-02-26 08:12:49 +03:00
|
|
|
TagDepth counts.Count32
|
2017-10-30 09:09:40 +03:00
|
|
|
}
|
|
|
|
|
2017-10-30 05:44:06 +03:00
|
|
|
type HistorySize struct {
|
|
|
|
// The total number of unique commits analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueCommitCount counts.Count32 `json:"unique_commit_count"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
|
|
|
// The total size of all commits analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueCommitSize counts.Count64 `json:"unique_commit_size"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
|
|
|
// The maximum size of any analyzed commit.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxCommitSize counts.Count32 `json:"max_commit_size"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The commit with the maximum size.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxCommitSizeCommit *Path `json:"max_commit,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-10-30 05:44:06 +03:00
|
|
|
// The maximum ancestor depth of any analyzed commit.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxHistoryDepth counts.Count32 `json:"max_history_depth"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
|
|
|
// The maximum number of direct parents of any analyzed commit.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxParentCount counts.Count32 `json:"max_parent_count"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
2017-12-09 01:23:22 +03:00
|
|
|
// The commit with the maximum number of direct parents.
|
2017-12-19 15:50:51 +03:00
|
|
|
MaxParentCountCommit *Path `json:"max_parent_count_commit,omitempty"`
|
2017-12-09 01:23:22 +03:00
|
|
|
|
2017-10-30 05:44:06 +03:00
|
|
|
// The total number of unique trees analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueTreeCount counts.Count32 `json:"unique_tree_count"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
|
|
|
// The total size of all trees analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueTreeSize counts.Count64 `json:"unique_tree_size"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
|
|
|
// The total number of tree entries in all unique trees analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueTreeEntries counts.Count64 `json:"unique_tree_entries"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
2017-10-30 05:57:46 +03:00
|
|
|
// The maximum number of entries an a tree.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxTreeEntries counts.Count32 `json:"max_tree_entries"`
|
2017-10-30 05:57:46 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tree with the maximum number of entries.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxTreeEntriesTree *Path `json:"max_tree_entries_tree,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-10-30 05:44:06 +03:00
|
|
|
// The total number of unique blobs analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueBlobCount counts.Count32 `json:"unique_blob_count"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
|
|
|
// The total size of all of the unique blobs analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueBlobSize counts.Count64 `json:"unique_blob_size"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
2017-10-30 06:16:30 +03:00
|
|
|
// The maximum size of any analyzed blob.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxBlobSize counts.Count32 `json:"max_blob_size"`
|
2017-10-30 06:16:30 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The biggest blob found.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxBlobSizeBlob *Path `json:"max_blob_size_blob,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-10-30 05:44:06 +03:00
|
|
|
// The total number of unique tag objects analyzed.
|
2018-02-26 08:12:49 +03:00
|
|
|
UniqueTagCount counts.Count32 `json:"unique_tag_count"`
|
2017-10-30 05:44:06 +03:00
|
|
|
|
2017-10-30 09:09:40 +03:00
|
|
|
// The maximum number of tags in a chain.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxTagDepth counts.Count32 `json:"max_tag_depth"`
|
2017-10-30 09:09:40 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tag with the maximum tag depth.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxTagDepthTag *Path `json:"max_tag_depth_tag,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-10-30 09:11:38 +03:00
|
|
|
// The number of references analyzed. Note that we don't eliminate
|
|
|
|
// duplicates if the user passes the same reference more than
|
|
|
|
// once.
|
2018-02-26 08:12:49 +03:00
|
|
|
ReferenceCount counts.Count32 `json:"reference_count"`
|
2017-10-30 09:11:38 +03:00
|
|
|
|
2021-10-13 15:29:39 +03:00
|
|
|
// ReferenceGroups keeps track of how many references in each
|
|
|
|
// reference group were scanned.
|
|
|
|
ReferenceGroups map[RefGroupSymbol]*counts.Count32 `json:"reference_groups"`
|
|
|
|
|
2017-10-30 05:44:06 +03:00
|
|
|
// The maximum TreeSize in the analyzed history (where each
|
|
|
|
// attribute is maximized separately).
|
2017-12-08 02:07:38 +03:00
|
|
|
|
|
|
|
// The maximum depth of trees and blobs starting at this object
|
2018-03-11 12:50:20 +03:00
|
|
|
// (not including this object).
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxPathDepth counts.Count32 `json:"max_path_depth"`
|
2017-12-08 02:07:38 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tree with the maximum path depth.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxPathDepthTree *Path `json:"max_path_depth_tree,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-12-08 02:07:38 +03:00
|
|
|
// The maximum length of any path relative to this object, in
|
|
|
|
// characters.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxPathLength counts.Count32 `json:"max_path_length"`
|
2017-12-08 02:07:38 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tree with the maximum path length.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxPathLengthTree *Path `json:"max_path_length_tree,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-12-08 02:07:38 +03:00
|
|
|
// The total number of trees, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxExpandedTreeCount counts.Count32 `json:"max_expanded_tree_count"`
|
2017-12-08 02:07:38 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tree with the maximum expanded tree count.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxExpandedTreeCountTree *Path `json:"max_expanded_tree_count_tree,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-12-08 02:07:38 +03:00
|
|
|
// The total number of blobs, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxExpandedBlobCount counts.Count32 `json:"max_expanded_blob_count"`
|
2017-12-08 02:07:38 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tree with the maximum expanded blob count.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxExpandedBlobCountTree *Path `json:"max_expanded_blob_count_tree,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-12-08 02:07:38 +03:00
|
|
|
// The total size of all blobs, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxExpandedBlobSize counts.Count64 `json:"max_expanded_blob_size"`
|
2017-12-08 02:07:38 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tree with the maximum expanded blob size.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxExpandedBlobSizeTree *Path `json:"max_expanded_blob_size_tree,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-12-08 02:07:38 +03:00
|
|
|
// The total number of symbolic links, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxExpandedLinkCount counts.Count32 `json:"max_expanded_link_count"`
|
2017-12-08 02:07:38 +03:00
|
|
|
|
2017-12-08 02:32:40 +03:00
|
|
|
// The tree with the maximum expanded link count.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxExpandedLinkCountTree *Path `json:"max_expanded_link_count_tree,omitempty"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
2017-12-08 02:07:38 +03:00
|
|
|
// The total number of submodules referenced, including duplicates.
|
2018-02-26 08:12:49 +03:00
|
|
|
MaxExpandedSubmoduleCount counts.Count32 `json:"max_expanded_submodule_count"`
|
2017-12-08 02:32:40 +03:00
|
|
|
|
|
|
|
// The tree with the maximum expanded submodule count.
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
MaxExpandedSubmoduleCountTree *Path `json:"max_expanded_submodule_count_tree,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Convenience function: forget `*path` if it is non-nil and overwrite
|
|
|
|
// it with a `*Path` for the object corresponding to `(oid,
|
|
|
|
// objectType)`. This function can be used if a new largest item was
|
|
|
|
// found.
|
|
|
|
func setPath(
|
2017-12-19 14:40:13 +03:00
|
|
|
pr PathResolver,
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
path **Path,
|
2018-02-26 08:39:53 +03:00
|
|
|
oid git.OID, objectType string) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
if *path != nil {
|
|
|
|
pr.ForgetPath(*path)
|
|
|
|
}
|
|
|
|
*path = pr.RequestPath(oid, objectType)
|
2017-10-30 05:44:06 +03:00
|
|
|
}
|
|
|
|
|
2018-02-26 08:39:53 +03:00
|
|
|
func (s *HistorySize) recordBlob(g *Graph, oid git.OID, blobSize BlobSize) {
|
2017-10-30 05:44:06 +03:00
|
|
|
s.UniqueBlobCount.Increment(1)
|
2018-02-26 08:12:49 +03:00
|
|
|
s.UniqueBlobSize.Increment(counts.Count64(blobSize.Size))
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxBlobSize.AdjustMaxIfNecessary(blobSize.Size) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxBlobSizeBlob, oid, "blob")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-10-30 05:44:06 +03:00
|
|
|
}
|
|
|
|
|
2018-02-26 08:12:49 +03:00
|
|
|
func (s *HistorySize) recordTree(
|
2018-02-26 08:39:53 +03:00
|
|
|
g *Graph, oid git.OID, treeSize TreeSize, size counts.Count32, treeEntries counts.Count32,
|
2018-02-26 08:12:49 +03:00
|
|
|
) {
|
2017-10-30 05:44:06 +03:00
|
|
|
s.UniqueTreeCount.Increment(1)
|
2018-02-26 08:12:49 +03:00
|
|
|
s.UniqueTreeSize.Increment(counts.Count64(size))
|
|
|
|
s.UniqueTreeEntries.Increment(counts.Count64(treeEntries))
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxTreeEntries.AdjustMaxIfNecessary(treeEntries) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxTreeEntriesTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-08 02:07:38 +03:00
|
|
|
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxPathDepth.AdjustMaxIfNecessary(treeSize.MaxPathDepth) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxPathDepthTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxPathLength.AdjustMaxIfNecessary(treeSize.MaxPathLength) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxPathLengthTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxExpandedTreeCount.AdjustMaxIfNecessary(treeSize.ExpandedTreeCount) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxExpandedTreeCountTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxExpandedBlobCount.AdjustMaxIfNecessary(treeSize.ExpandedBlobCount) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxExpandedBlobCountTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxExpandedBlobSize.AdjustMaxIfNecessary(treeSize.ExpandedBlobSize) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxExpandedBlobSizeTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxExpandedLinkCount.AdjustMaxIfNecessary(treeSize.ExpandedLinkCount) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxExpandedLinkCountTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxExpandedSubmoduleCount.AdjustMaxIfNecessary(treeSize.ExpandedSubmoduleCount) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxExpandedSubmoduleCountTree, oid, "tree")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-10-30 05:44:06 +03:00
|
|
|
}
|
|
|
|
|
2018-02-26 08:12:49 +03:00
|
|
|
func (s *HistorySize) recordCommit(
|
2018-02-26 08:39:53 +03:00
|
|
|
g *Graph, oid git.OID, commitSize CommitSize,
|
2018-02-26 08:12:49 +03:00
|
|
|
size counts.Count32, parentCount counts.Count32,
|
|
|
|
) {
|
2017-10-30 05:44:06 +03:00
|
|
|
s.UniqueCommitCount.Increment(1)
|
2018-02-26 08:12:49 +03:00
|
|
|
s.UniqueCommitSize.Increment(counts.Count64(size))
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxCommitSize.AdjustMaxIfPossible(size) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxCommitSizeCommit, oid, "commit")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-12-11 19:05:52 +03:00
|
|
|
s.MaxHistoryDepth.AdjustMaxIfPossible(commitSize.MaxAncestorDepth)
|
|
|
|
if s.MaxParentCount.AdjustMaxIfPossible(parentCount) {
|
2017-12-09 01:23:22 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxParentCountCommit, oid, "commit")
|
|
|
|
}
|
2017-10-30 05:44:06 +03:00
|
|
|
}
|
|
|
|
|
2018-02-26 08:39:53 +03:00
|
|
|
func (s *HistorySize) recordTag(g *Graph, oid git.OID, tagSize TagSize, size counts.Count32) {
|
2017-10-30 09:09:40 +03:00
|
|
|
s.UniqueTagCount.Increment(1)
|
2017-12-11 19:05:52 +03:00
|
|
|
if s.MaxTagDepth.AdjustMaxIfNecessary(tagSize.TagDepth) {
|
Report the paths to the worst objects
It is frustrating to find out that there is, say, a gigantic blob in a
repository with no easy way to find out where it is in the
history. (Git's tooling for answering that question is almost
nonexistent.) So...
* Define a new class, `PathResolver`, that can compute the
paths (i.e., the `git rev-parse` expression) that can be used to
access an object with a specific OID. This class must be fed the
objects and references in depth-first order, but that's OK because
that's the order we need to process them to compute their expanded
sizes.
* Change `Sizes` to record `*Path` pointers, rather than OIDs, for the
big objects that it has found.
* If `Sizes` finds a new object that is bigger than the biggest that
it has seen before, have it discard the (maybe only partly-computed)
path of the old object and replace it with the new one.
`PathResolver` knows how to free resources associated with the
partly-computed paths, so basically the number of paths being
computed at any time is at most one for each of the "largest" things
that we want to track.
* Output the paths (still in crude format) to the JSON output.
This costs about 15% in runtime and hardly any additional memory
consumption.
Still to do: (1) Improve the format of the path output. (2) Emit
better output for commits that are not referred to directly by
references (perhaps using `git name-rev`). (3) Include the paths in
non-JSON output, and probably change where they are presented in JSON
format.
2017-12-08 19:14:47 +03:00
|
|
|
setPath(g.pathResolver, &s.MaxTagDepthTag, oid, "tag")
|
2017-12-08 02:32:40 +03:00
|
|
|
}
|
2017-10-30 09:09:40 +03:00
|
|
|
}
|
|
|
|
|
2018-02-26 08:30:45 +03:00
|
|
|
func (s *HistorySize) recordReference(g *Graph, ref git.Reference) {
|
2017-10-30 09:11:38 +03:00
|
|
|
s.ReferenceCount.Increment(1)
|
|
|
|
}
|
2021-10-13 15:17:01 +03:00
|
|
|
|
|
|
|
func (s *HistorySize) recordReferenceGroup(g *Graph, group RefGroupSymbol) {
|
2021-10-13 15:29:39 +03:00
|
|
|
c, ok := s.ReferenceGroups[group]
|
|
|
|
if ok {
|
|
|
|
c.Increment(1)
|
|
|
|
} else {
|
|
|
|
n := counts.Count32(1)
|
|
|
|
s.ReferenceGroups[group] = &n
|
|
|
|
}
|
2021-10-13 15:17:01 +03:00
|
|
|
}
|