fundamentals

import torch

torch.__version__
'2.4.0.dev20240602'

1 Tensor

#TODO: 读文档 torch.Tensor

1.1 scalar

scalar = torch.tensor(9)
scalar
tensor(9)
scalar.ndim, scalar.ndimension()
(0, 0)
scalar.item()
9

1.2 vector

vector = torch.tensor([9, 9, 9])
vector
tensor([9, 9, 9])
vector.ndim
1
vector.shape
torch.Size([3])

1.3 matrix

matrix = torch.tensor([[9, 9, 9],
                       [9, 9, 9]])
matrix
tensor([[9, 9, 9],
        [9, 9, 9]])
matrix.ndim
2
matrix.shape
torch.Size([2, 3])
matrix.size()
torch.Size([2, 3])

1.4 tensor

tensor = torch.tensor(
    [  #dim0, 这对括号里面有两个matrix
        [  #dim1, 这对括号里面有三个vector
            [  #dim2, 这对括号里面有三个scalar
                1,
                2,
                3,
            ],
            [1, 2, 3],
            [1, 2, 3]
        ],
        [  #dim1
            [1, 2, 3],
            [1, 2, 3],
            [1, 2, 3]
        ]
    ]
)
tensor
tensor([[[1, 2, 3],
         [1, 2, 3],
         [1, 2, 3]],

        [[1, 2, 3],
         [1, 2, 3],
         [1, 2, 3]]])
tensor.ndim
3
tensor.shape
torch.Size([2, 3, 3])

2 random tensor

ramdom_tensor = torch.rand(size=(3, 4))
ramdom_tensor, ramdom_tensor.dtype
(tensor([[0.9508, 0.0344, 0.1949, 0.2121],
         [0.6301, 0.8800, 0.0905, 0.8551],
         [0.1719, 0.8458, 0.5306, 0.7635]]),
 torch.float32)
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim
(torch.Size([224, 224, 3]), 3)

3 zeros and ones

zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype
(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)
ones = torch.ones(size=(3, 4))
ones, ones.dtype
(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)
# Use torch.arange(), torch.range() is deprecated 
zero_to_ten_deprecated = torch.range(0, 10)

zero_to_ten = torch.arange(start=0, end=10, step=1)
zero_to_ten
/var/folders/jw/r2366h9x7y99tvnxp8fzcrdh0000gn/T/ipykernel_18720/2515304713.py:2: UserWarning: torch.range is deprecated and will be removed in a future release because its behavior is inconsistent with Python's range builtin. Instead, use torch.arange, which produces values in [start, end).
  zero_to_ten_deprecated = torch.range(0, 10)





tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ten_zeros = torch.zeros_like(input=zero_to_ten)
ten_zeros
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

4 tensor data type

float_32_tensor = torch.tensor([1, 2, 3],
                               dtype=None,  # default is float32
                               device=None,  # default is cpu
                               requires_grad=False,
                               # if True, the tensor will keep track of the operations that created it
                               )
float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device, float_32_tensor.requires_grad
(torch.Size([3]), torch.int64, device(type='cpu'), False)
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16)  # torch.half would also work

float_16_tensor.dtype
torch.float16

5 tensor operations

These operations are often a wonderful dance between:

  • Addition
  • Substraction
  • Multiplication (element-wise)
  • Division
  • Matrix multiplication

5.1 basic operations

tensor = torch.tensor([1, 2, 3])
# Tensors don't change unless reassigned, when you do an operation, you need to assign it to a new tensor
tensor + 10, torch.add(tensor, 10)
(tensor([11, 12, 13]), tensor([11, 12, 13]))
tensor - 10, torch.sub(tensor, 10)
(tensor([-9, -8, -7]), tensor([-9, -8, -7]))
tensor * 10, torch.multiply(tensor, 10), torch.mul(tensor, 10)
(tensor([10, 20, 30]), tensor([10, 20, 30]), tensor([10, 20, 30]))
tensor / 10, torch.divide(tensor, 10), torch.div(tensor, 10)
(tensor([0.1000, 0.2000, 0.3000]),
 tensor([0.1000, 0.2000, 0.3000]),
 tensor([0.1000, 0.2000, 0.3000]))
# Element-wise multiplication (each element multiplies its equivalent, index 0->0, 1->1, 2->2)
tensor * tensor, torch.mul(tensor, tensor)
(tensor([1, 4, 9]), tensor([1, 4, 9]))

5.2 matrix multiplication (is all you need)

import torch

tensor = torch.tensor([1, 2, 3])
tensor.shape
torch.Size([3])
tensor * tensor, torch.mul(tensor, tensor)
(tensor([1, 4, 9]), tensor([1, 4, 9]))
tensor @ tensor, torch.matmul(tensor, tensor)
# torch.mm(tensor, tensor) this will error, as the tensors are not matrices
(tensor(14), tensor(14))

5.3 common error, shape mismatch

# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],  #
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B)  # (this will error)
---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

Cell In[32], line 10
      2 tensor_A = torch.tensor([[1, 2],
      3                          [3, 4],
      4                          [5, 6]], dtype=torch.float32)
      6 tensor_B = torch.tensor([[7, 10],
      7                          [8, 11],  #
      8                          [9, 12]], dtype=torch.float32)
---> 10 torch.matmul(tensor_A, tensor_B)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)
print(tensor_A)
print(tensor_B)
print(tensor_A)
print(tensor_B.T)
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")
tensor_A @ tensor_B.T, torch.matmul(tensor_A, tensor_B.T), torch.mm(tensor_A, tensor_B.T)

5.4 linear layer

torch.manual_seed(42)
linear = torch.nn.Linear(in_features=2, out_features=6)
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")
Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])

6 aggregation: sum, mean, max, min, etc

x = torch.arange(0, 100, 10)
x, x.dtype
(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}")  # won't work without float datatype
print(f"Sum: {x.sum()}")
Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450
torch.max(x), torch.min(x), torch.mean(x.type(torch.float32)), torch.sum(x)
(tensor(90), tensor(0), tensor(45.), tensor(450))
# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")
Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0
torch.argmax(tensor), torch.argmin(tensor)
(tensor(8), tensor(0))

7 change data type

tensor.type(torch.float16)
tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)
tensor.type(torch.int8)
tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

8 reshape,stacking,squeeze,unsqueeze

Often times you’ll want to reshape or change the dimensions of your tensors without actually changing the values inside them.

To do so, some popular methods are:

Method One-line description
torch.reshape(input, shape) Reshapes input to shape (if compatible), can also use torch.Tensor.reshape().
Tensor.view(shape) Returns a view of the original tensor in a different shape but shares the same data as the original tensor.
torch.stack(tensors, dim=0) Concatenates a sequence of tensors along a new dimension (dim), all tensors must be same size.
torch.squeeze(input) Squeezes input to remove all the dimenions with value 1.
torch.unsqueeze(input, dim) Returns input with a dimension value of 1 added at dim.
torch.permute(input, dims) Returns a view of the original input with its dimensions permuted (rearranged) to dims.
# Create a tensor
import torch

x = torch.arange(1., 8.)
x, x.shape
(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))
x_reshaped = torch.reshape(x, (1, 7))
x_reshaped, x_reshaped.shape
(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

#TODO: 阅读 https://stackoverflow.com/a/54507446/7900723

x_view = x.view(1, 7)
x_view, x_view.shape
(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))
# change x_view, x_reshaped will also change
x_view[:, 0] = 7
x_view, x
(tensor([[7., 2., 3., 4., 5., 6., 7.]]), tensor([7., 2., 3., 4., 5., 6., 7.]))

If we wanted to stack our new tensor on top of itself five times, we could do so with torch.stack().

x_stacked = torch.stack([x, x, x, x, x], dim=0)
x_stacked, x_stacked.shape
(tensor([[7., 2., 3., 4., 5., 6., 7.],
         [7., 2., 3., 4., 5., 6., 7.],
         [7., 2., 3., 4., 5., 6., 7.],
         [7., 2., 3., 4., 5., 6., 7.],
         [7., 2., 3., 4., 5., 6., 7.]]),
 torch.Size([5, 7]))
x_stacked = torch.stack([x, x, x, x, x], dim=1)
x_stacked, x_stacked.shape
(tensor([[7., 7., 7., 7., 7.],
         [2., 2., 2., 2., 2.],
         [3., 3., 3., 3., 3.],
         [4., 4., 4., 4., 4.],
         [5., 5., 5., 5., 5.],
         [6., 6., 6., 6., 6.],
         [7., 7., 7., 7., 7.]]),
 torch.Size([7, 5]))
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")
Previous tensor: tensor([[7., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([7., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")
Previous tensor: tensor([7., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[7., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])

You can also rearrange the order of axes values with torch.permute(input, dims), where the input gets turned into a view with new dims.

Note: Because permuting returns a view (shares the same data as the original), the values in the permuted tensor will be the same as the original tensor and if you change the values in the view, it will change the values of the original.

# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1)  # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")
Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])

9 indexing: select data from tensor

# Create a tensor 
import torch

x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape
(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))
# Let's index bracket by bracket
print(f"First square bracket:\n{x[0]}")
print(f"Second square bracket: {x[0][0]}")
print(f"Third square bracket: {x[0][0][0]}")
First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1
x[0], x[0, 0], x[0, 0, 0]
(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([1, 2, 3]),
 tensor(1))

You can also use : to specify “all values in this dimension” and then use a comma (,) to add another dimension.

# Get all values of 0th dimension and the 0 index of 1st dimension
x[:, 0]
tensor([[1, 2, 3]])
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
x[:, :, 1]
tensor([[2, 5, 8]])
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]
tensor([5])
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
x[0, 0, :]  # same as x[0][0]
tensor([1, 2, 3])

10 PyTorch tensors & NumPy

Since NumPy is a popular Python numerical computing library, PyTorch has functionality to interact with it nicely.

The two main methods you’ll want to use for NumPy to PyTorch (and back again) are:

# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor
(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

By default, NumPy arrays are created with the datatype float64 and if you convert it to a PyTorch tensor, it’ll keep the same datatype (as above).

However, many PyTorch calculations default to using float32.

So if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32).

tensor= torch.from_numpy(array).type(torch.float32)
# Tensor to NumPy array
tensor = torch.ones(7) # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy() # will be dtype=float32 unless changed
tensor, numpy_tensor
(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

11 Reproducibility: trying to take the random out of random

#TODO: The PyTorch reproducibility documentation

#TODO: The Wikipedia random seed page

import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B
Tensor A:
tensor([[0.8016, 0.3649, 0.6286, 0.9663],
        [0.7687, 0.4566, 0.5745, 0.9200],
        [0.3230, 0.8613, 0.0919, 0.3102]])

Tensor B:
tensor([[0.9536, 0.6002, 0.0351, 0.6826],
        [0.3743, 0.5220, 0.1336, 0.9666],
        [0.9754, 0.8474, 0.8988, 0.1105]])

Does Tensor A equal Tensor B? (anywhere)





tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])
import torch
import random

# # Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED) 
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called 
# Without this, tensor_D would be different to tensor_C 
torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D
Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)





tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])
import torch
import random

# # Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED) 
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called 
# Without this, tensor_D would be different to tensor_C 
# torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D
Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])

Does Tensor C equal Tensor D? (anywhere)





tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

12 use gpu

if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
    print(torch.cuda.device_count())
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
    print(torch.mps.device_count())
else:
    device = "cpu" # Default to CPU if no GPU is available
    print(torch.cpu.device_count())
device
1





'mps'
# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu
tensor([1, 2, 3]) cpu





tensor([1, 2, 3], device='mps:0')
# If tensor is on GPU, can't transform it to NumPy (this will error)
tensor_on_gpu.numpy()
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

Cell In[65], line 2
      1 # If tensor is on GPU, can't transform it to NumPy (this will error)
----> 2 tensor_on_gpu.numpy()


TypeError: can't convert mps:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
# Instead, copy the tensor back to cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu
array([1, 2, 3])
tensor_on_gpu
tensor([1, 2, 3], device='mps:0')