Skip to content

Commit

Permalink
feat: change v2 piece multihashes to enable arbitrarily sized data
Browse files Browse the repository at this point in the history
  • Loading branch information
aschmahmann committed Aug 25, 2023
1 parent 67b99e5 commit ec6db9a
Show file tree
Hide file tree
Showing 4 changed files with 256 additions and 87 deletions.
103 changes: 82 additions & 21 deletions commcid.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,45 @@ func Fr32PaddedSizeToV1TreeHeight(size uint64) uint8 {
// tree before any FR32 padding is applied. Because pieces are only defined on binary trees of FR32 encoded data if the
// size is not a power of 2 after the FR32 padding is applied it will be rounded up to the next one under the assumption
// that the rest of the tree will be padded out (e.g. with zeros)
func UnpaddedSizeToV1TreeHeight(size uint64) uint8 {
func UnpaddedSizeToV1TreeHeight(size uint64) (uint8, error) {
if size*128 < size {
return 0, fmt.Errorf("unsupported size: too big")
}

paddedSize := size * 128 / 127
if paddedSize*127 != size*128 {
paddedSize += 1
}

return Fr32PaddedSizeToV1TreeHeight(paddedSize)
return Fr32PaddedSizeToV1TreeHeight(paddedSize), nil
}

// UnpaddedSizeToV1TreeHeightAndPadding calculates the height of the piece tree given the data that's meant to be
// encoded in the tree before any FR32 padding is applied. Because pieces are only defined on binary trees of FR32
// encoded data if the size is not a power of 2 after the FR32 padding is applied it will be rounded up to the next one
// under the assumption that the rest of the tree will be padded out (e.g. with zeros). The amount of data padding that
// is needed to be applied is returned alongside the tree height.
func UnpaddedSizeToV1TreeHeightAndPadding(dataSize uint64) (uint8, uint64, error) {
if dataSize*128 < dataSize {
return 0, 0, fmt.Errorf("unsupported size: too big")
}

if dataSize < 127 {
return 0, 0, fmt.Errorf("unsupported size: too small")
}

fr32DataSize := dataSize * 128 / 127
// If the FR32 padding doesn't fill an exact number of bytes add up to 1 more byte of zeros to round it out
if fr32DataSize*127 != dataSize*128 {
fr32DataSize += 1
}

treeHeight := Fr32PaddedSizeToV1TreeHeight(fr32DataSize)
paddedFr32DataSize := uint64(32) << treeHeight
paddedDataSize := paddedFr32DataSize / 128 * 127
padding := paddedDataSize - dataSize

return treeHeight, padding, nil
}

// DataCommitmentV1ToPieceMhCID converts a raw data commitment and the height of the commitment tree
Expand All @@ -128,17 +160,27 @@ func UnpaddedSizeToV1TreeHeight(size uint64) uint8 {
// - hash type: multihash.SHA2_256_TRUNC254_PADDED_BINARY_TREE
//
// The helpers UnpaddedSizeToV1TreeHeight and Fr32PaddedSizeToV1TreeHeight may help in computing tree height
func DataCommitmentV1ToPieceMhCID(commD []byte, height uint8) (cid.Cid, error) {
func DataCommitmentV1ToPieceMhCID(commD []byte, unpaddedDataSize uint64) (cid.Cid, error) {
if len(commD) != 32 {
return cid.Undef, fmt.Errorf("commitments must be 32 bytes long")
}

if height < 2 {
return cid.Undef, fmt.Errorf("tree height must be at least 2, but was %d", height)
if unpaddedDataSize < 127 {
return cid.Undef, fmt.Errorf("unpadded data size must be at least 127, but was %d", unpaddedDataSize)
}

height, padding, err := UnpaddedSizeToV1TreeHeightAndPadding(unpaddedDataSize)
if err != nil {
return cid.Undef, err
}

if padding > varint.MaxValueUvarint63 {
return cid.Undef, fmt.Errorf("padded data size must be less than 2^63-1, but was %d", padding)
}

mh := FR32_SHA256_TRUNC254_PADDED_BINARY_TREE_CODE
digestSize := len(commD) + 1
paddingSize := varint.UvarintSize(padding)
digestSize := len(commD) + 1 + paddingSize

mhBuf := make(
[]byte,
Expand All @@ -149,6 +191,7 @@ func DataCommitmentV1ToPieceMhCID(commD []byte, height uint8) (cid.Cid, error) {
pos += varint.PutUvarint(mhBuf[pos:], uint64(digestSize))
mhBuf[pos] = height
pos++
pos += varint.PutUvarint(mhBuf[pos:], padding)
copy(mhBuf[pos:], commD)

return cid.NewCidV1(uint64(cid.Raw), mhBuf), nil
Expand All @@ -169,8 +212,8 @@ func CIDToDataCommitmentV1(c cid.Cid) ([]byte, error) {
return commD, nil
}

// PieceMhCIDToDataCommitmentV1 extracts the raw data commitment and tree height from the CID
func PieceMhCIDToDataCommitmentV1(c cid.Cid) ([]byte, uint8, error) {
// PieceMhCIDToDataCommitmentV1 extracts the raw data commitment and unpadded data size from the CID
func PieceMhCIDToDataCommitmentV1(c cid.Cid) ([]byte, uint64, error) {
decoded, err := multihash.Decode(c.Hash())
if err != nil {
return nil, 0, xerrors.Errorf("Error decoding data commitment hash: %w", err)
Expand All @@ -180,18 +223,36 @@ func PieceMhCIDToDataCommitmentV1(c cid.Cid) ([]byte, uint8, error) {
return nil, 0, ErrIncorrectHash
}

if decoded.Length != 33 {
return nil, 0, xerrors.Errorf("expected multihash digest to be 33 bytes, but was %d bytes", decoded.Length)
if decoded.Length < 34 {
return nil, 0, xerrors.Errorf("expected multihash digest to be at least 34 bytes, but was %d bytes", decoded.Length)
}

treeHeight := decoded.Digest[0]
paddingSize, paddingSizeVarintLen, err := varint.FromUvarint(decoded.Digest[1:])
if err != nil {
return nil, 0, xerrors.Errorf("error decoding padding size: %w", err)
}

if expectedDigestSize := 33 + paddingSizeVarintLen; decoded.Length != expectedDigestSize {
return nil, 0, xerrors.Errorf("expected multihash digest to be %d bytes, but was %d bytes", expectedDigestSize, decoded.Length)
}

paddedFr32TreeSize := uint64(32) << treeHeight
paddedTreeSize := paddedFr32TreeSize * 127 / 128
halfPaddedTreeSize := paddedTreeSize >> 1

if paddingSize >= halfPaddedTreeSize {
return nil, 0, xerrors.Errorf("size of padding (%d) must be less than half the size of the padded data (%d)", paddingSize, halfPaddedTreeSize)
}

height := decoded.Digest[0]
unpaddedDataSize := paddedTreeSize - paddingSize

if height < 2 {
return nil, 0, fmt.Errorf("tree height must be at least 2, but was %d", height)
if unpaddedDataSize < 127 {
return nil, 0, fmt.Errorf("unpadded data size must be at least 127, but was %d", unpaddedDataSize)
}

commitmentHash := decoded.Digest[1:]
return commitmentHash, height, nil
commitmentHash := decoded.Digest[1+paddingSizeVarintLen:]
return commitmentHash, unpaddedDataSize, nil
}

// ReplicaCommitmentV1ToCID converts a raw data commitment to a CID
Expand Down Expand Up @@ -243,19 +304,19 @@ func validateFilecoinCidSegments(mc FilMultiCodec, mh FilMultiHash, commX []byte
// piece multihash CID
//
// The helpers UnpaddedSizeToV1TreeHeight and Fr32PaddedSizeToV1TreeHeight may help in computing tree height
func ConvertDataCommitmentV1V1CIDtoPieceMhCID(v1PieceCid cid.Cid, treeHeight uint8) (cid.Cid, error) {
func ConvertDataCommitmentV1V1CIDtoPieceMhCID(v1PieceCid cid.Cid, unpaddedDataSize uint64) (cid.Cid, error) {
hashDigest, err := CIDToDataCommitmentV1(v1PieceCid)
if err != nil {
return cid.Undef, xerrors.Errorf("Error decoding piece CID v1: %w", err)
}

return DataCommitmentV1ToPieceMhCID(hashDigest, treeHeight)
return DataCommitmentV1ToPieceMhCID(hashDigest, unpaddedDataSize)
}

// ConvertDataCommitmentV1PieceMhCIDToV1CID takes a piece multihash CID and produces a v1 piece CID along with the
// tree height of the CommP tree
func ConvertDataCommitmentV1PieceMhCIDToV1CID(pieceMhCid cid.Cid) (cid.Cid, uint8, error) {
digest, height, err := PieceMhCIDToDataCommitmentV1(pieceMhCid)
// size of the unpadded data
func ConvertDataCommitmentV1PieceMhCIDToV1CID(pieceMhCid cid.Cid) (cid.Cid, uint64, error) {
digest, unpaddedDataSize, err := PieceMhCIDToDataCommitmentV1(pieceMhCid)
if err != nil {
return cid.Undef, 0, xerrors.Errorf("Error decoding data piece CID v2: %w", err)
}
Expand All @@ -264,7 +325,7 @@ func ConvertDataCommitmentV1PieceMhCIDToV1CID(pieceMhCid cid.Cid) (cid.Cid, uint
if err != nil {
return cid.Undef, 0, xerrors.Errorf("Could not create piece CID v1: %w", err)
}
return c, height, nil
return c, unpaddedDataSize, nil
}

// PieceCommitmentV1ToCID converts a commP to a CID
Expand Down
Loading

0 comments on commit ec6db9a

Please sign in to comment.