Skip to content

Commit a75fe04

Browse files
committed
Add sequence and distinct sequence
1 parent f8b9a73 commit a75fe04

File tree

3 files changed

+94
-6
lines changed

3 files changed

+94
-6
lines changed

docs/functional.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,12 @@ Encodings
5353
:toctree: generated/
5454
:template: function.rst
5555

56-
57-
ngrams
56+
multiset
57+
multibind
58+
sequence
59+
distinct_sequence
5860
hash_table
61+
ngrams
5962

6063

6164
Utilities

torchhd/functional.py

+79-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
"cosine_similarity",
2222
"dot_similarity",
2323
"multiset",
24+
"multibind",
25+
"sequence",
26+
"distinct_sequence",
2427
"ngrams",
2528
"hash_table",
2629
"map_range",
@@ -436,24 +439,52 @@ def multiset(
436439
dim=-2,
437440
keepdim=False,
438441
dtype=None,
442+
out=None,
439443
) -> Tensor:
440-
"""Returns element-wise sum of hypervectors hv
444+
"""Element-wise sum of input hypervectors
441445
442446
Args:
443447
input (Tensor): input hypervector tensor
444448
dim (int, optional): dimension over which to bundle the hypervectors. Defaults to -2.
445449
keepdim (bool, optional): whether to keep the bundled dimension. Defaults to False.
446450
dtype (torch.dtype, optional): if specified determins the type of the returned tensor, otherwise same as input.
451+
out (Tensor, optional): the output tensor.
447452
448453
Returns:
449454
Tensor: bundled hypervector
455+
"""
456+
return torch.sum(input, dim=dim, keepdim=keepdim, dtype=dtype, out=out)
457+
458+
459+
def multibind(input: Tensor, *, dim=-2, keepdim=False, dtype=None, out=None) -> Tensor:
460+
"""Element-wise multiplication of input hypervectors
461+
462+
Args:
463+
input (Tensor): input hypervector tensor
464+
dim (int, optional): dimension over which to bind the hypervectors. Defaults to -2.
465+
keepdim (bool, optional): whether to keep the bundled dimension. Defaults to False.
466+
dtype (torch.dtype, optional): if specified determins the type of the returned tensor, otherwise same as input.
467+
out (Tensor, optional): the output tensor.
450468
469+
Returns:
470+
Tensor: bound hypervector
451471
"""
472+
return torch.prod(input, dim=dim, keepdim=keepdim, dtype=dtype, out=out)
473+
452474

453-
return torch.sum(input, dim=dim, keepdim=keepdim, dtype=dtype)
475+
def ngrams(input: Tensor, n=3) -> Tensor:
476+
"""Creates a hypervector containing the n-gram statistics of input
454477
478+
Arguments are of shape (*, n, d) where `*` is any dimensions including none, `n` is the
479+
number of values, and d is the dimensionality of the hypervector.
455480
456-
def ngrams(input: Tensor, n=3):
481+
Args:
482+
input (Tensor): The value hypervectors.
483+
n (int, optional): The size of each n-gram. Defaults to 3.
484+
485+
Returns:
486+
Tensor: output hypervector of shape (*, d)
487+
"""
457488
n_gram = None
458489
for i in range(0, n):
459490
if i == (n - 1):
@@ -468,8 +499,9 @@ def ngrams(input: Tensor, n=3):
468499
return multiset(n_gram)
469500

470501

471-
def hash_table(keys: Tensor, values: Tensor):
502+
def hash_table(keys: Tensor, values: Tensor) -> Tensor:
472503
"""Combines the keys and values hypervectors to create a hash table.
504+
473505
Arguments are of shape (*, v, d) where `*` is any dimensions including none, `v` is the
474506
number of key-value pairs, and d is the dimensionality of the hypervector.
475507
@@ -483,6 +515,48 @@ def hash_table(keys: Tensor, values: Tensor):
483515
return multiset(bind(keys, values))
484516

485517

518+
def sequence(input: Tensor) -> Tensor:
519+
"""Creates a bundling-based sequence
520+
521+
The first value is permuted n-1 times, the last value is permuted 0 times.
522+
523+
Args:
524+
input (Tensor): The n hypervector values of shape (*, n, d).
525+
526+
Returns:
527+
Tensor: output hypervector of shape (*, d)
528+
"""
529+
dim = -2
530+
n = input.size(dim)
531+
532+
enum = enumerate(torch.unbind(input, dim))
533+
permuted = [permute(hv, shifts=n - i - 1) for i, hv in enum]
534+
permuted = torch.stack(permuted, dim)
535+
536+
return multiset(permuted)
537+
538+
539+
def distinct_sequence(input: Tensor) -> Tensor:
540+
"""Creates a binding-based sequence
541+
542+
The first value is permuted n-1 times, the last value is permuted 0 times.
543+
544+
Args:
545+
input (Tensor): The n hypervector values of shape (*, n, d).
546+
547+
Returns:
548+
Tensor: output hypervector of shape (*, d)
549+
"""
550+
dim = -2
551+
n = input.size(dim)
552+
553+
enum = enumerate(torch.unbind(input, dim))
554+
permuted = [permute(hv, shifts=n - i - 1) for i, hv in enum]
555+
permuted = torch.stack(permuted, dim)
556+
557+
return multibind(permuted)
558+
559+
486560
def map_range(
487561
input: Tensor,
488562
in_min: float,
@@ -554,6 +628,7 @@ def cleanup(input: Tensor, memory: Tensor, threshold=0.0) -> Tensor:
554628
Args:
555629
input (Tensor): The hypervector to cleanup
556630
memory (Tensor): The `n` hypervectors in memory of shape (n, d)
631+
threshold (float, optional): minimal similarity between input and any hypervector in memory. Defaults to 0.0.
557632
558633
Returns:
559634
Tensor: output tensor

torchhd/structures.py

+10
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,11 @@ def __len__(self) -> int:
215215
def clear(self) -> None:
216216
self.value.fill_(0.0)
217217

218+
@classmethod
219+
def from_tensor(cls, input: Tensor):
220+
value = functional.sequence(input)
221+
return cls(value, size=input.size(-2))
222+
218223

219224
class DistinctSequence:
220225
@overload
@@ -267,6 +272,11 @@ def __len__(self) -> int:
267272
def clear(self) -> None:
268273
self.value.fill_(0.0)
269274

275+
@classmethod
276+
def from_tensor(cls, input: Tensor):
277+
value = functional.distinct_sequence(input)
278+
return cls(value, size=input.size(-2))
279+
270280

271281
class Graph:
272282
def __init__(self, dimensions, directed=False, device=None, dtype=None):

0 commit comments

Comments
 (0)