From f105ba015078d22332b2d5b6e6f14419cbb70840 Mon Sep 17 00:00:00 2001
From: ZhuangYumin <zhuangyumin@sjtu.edu.cn>
Date: Sat, 6 Jul 2024 01:24:14 +0800
Subject: [PATCH] ml q5

---
 machinelearning/autograder.py |  2 +-
 machinelearning/backend.py    |  2 +-
 machinelearning/models.py     | 75 ++++++++++++++++++++++++++++++-----
 3 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/machinelearning/autograder.py b/machinelearning/autograder.py
index a307729..2c31793 100644
--- a/machinelearning/autograder.py
+++ b/machinelearning/autograder.py
@@ -576,7 +576,7 @@ def check_convolution(tracker):
         input = torch.rand(matrix_size, matrix_size)
         student_output = models.Convolve(input, weights)
         actual_output = conv2d(input,weights)
-        assert torch.isclose(student_output, actual_output).all(), "The convolution returned by Convolve() does not match expected output"
+        assert torch.isclose(student_output.cpu(), actual_output).all(), "The convolution returned by Convolve() does not match expected output"
 
     tracker.add_points(1/2) # Partial credit for testing whether convolution function works
 
diff --git a/machinelearning/backend.py b/machinelearning/backend.py
index 0cb824b..77008ad 100644
--- a/machinelearning/backend.py
+++ b/machinelearning/backend.py
@@ -492,7 +492,7 @@ class DigitClassificationDataset2(CustomDataset):
     def get_validation_accuracy(self):
         dev_logits = self.model.run(torch.tensor(self.dev_images, dtype=torch.float32)).data
         dev_predicted = torch.argmax(dev_logits, axis=1).detach()
-        dev_accuracy = torch.mean(torch.eq(dev_predicted, torch.tensor(self.dev_labels)).float())
+        dev_accuracy = torch.mean(torch.eq(dev_predicted.cpu(), torch.tensor(self.dev_labels)).float())
         return dev_accuracy
 
 def main():
diff --git a/machinelearning/models.py b/machinelearning/models.py
index 8607d85..63d4cff 100644
--- a/machinelearning/models.py
+++ b/machinelearning/models.py
@@ -457,15 +457,28 @@ def Convolve(input: tensor, weight: tensor):
 
     This returns a subtensor who's first element is tensor[y,x] and has height 'height, and width 'width'
     """
-    input_tensor_dimensions = input.shape
-    weight_dimensions = weight.shape
-    Output_Tensor = tensor(())
-    "*** YOUR CODE HERE ***"
-
+    input_tensor_height, input_tensor_width = input.shape
+    weight_height, weight_width = weight.shape
     
-    "*** End Code ***"
-    return Output_Tensor
+    # Calculate output dimensions
+    output_height = input_tensor_height - weight_height + 1
+    output_width = input_tensor_width - weight_width + 1
 
+    # Initialize output tensor
+    if input.device.type!=Convolve.device.type:
+        input=input.to(Convolve.device)
+    if weight.device.type!=Convolve.device.type:
+        weight=weight.to(Convolve.device)
+    output = torch.zeros((output_height, output_width),device=Convolve.device)
+
+    # Perform convolution
+    for i in range(output_height):
+        for j in range(output_width):
+            output[i, j] = torch.tensordot(input[i:i+weight_height, j:j+weight_width], weight, dims=2)
+
+    return output
+
+Convolve.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 class DigitConvolutionalModel(Module):
@@ -484,9 +497,17 @@ class DigitConvolutionalModel(Module):
         # Initialize your model parameters here
         super().__init__()
         output_size = 10
-
-        self.convolution_weights = Parameter(ones((3, 3)))
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.convolution_weights = Parameter(ones((3, 3))).to(self.device)
         """ YOUR CODE HERE """
+        flatten_size = 26 * 26
+        linear1_size = 300
+        linear2_size = 300
+        linear3_size = 300
+        self.fc1=Linear(flatten_size, linear1_size).to(self.device)
+        self.fc_out = Linear(linear1_size, output_size).to(self.device)
+        # self.fc2 = Linear(linear1_size, linear2_size).to(self.device)
+        # self.fc3 = Linear(linear2_size, output_size).to(self.device)
 
 
     def run(self, x):
@@ -494,10 +515,18 @@ class DigitConvolutionalModel(Module):
         The convolutional layer is already applied, and the output is flattened for you. You should treat x as
         a regular 1-dimentional datapoint now, similar to the previous questions.
         """
+        # print(f"now x={x}")
         x = x.reshape(len(x), 28, 28)
         x = stack(list(map(lambda sample: Convolve(sample, self.convolution_weights), x)))
+        # print(f"now x={x}")
         x = x.flatten(start_dim=1)
         """ YOUR CODE HERE """
+        # x=x.to(self.device)
+        x=torch.relu(self.fc1(x))
+        x = self.fc_out(x)
+        # x = torch.relu(self.fc2(x))
+        # x = torch.relu(self.fc3(x))
+        return x
 
  
 
@@ -515,6 +544,13 @@ class DigitConvolutionalModel(Module):
         Returns: a loss tensor
         """
         """ YOUR CODE HERE """
+        # print(f"x={x},y={y}")
+        # print(f"self.run(x.to(self.device))={self.run(x.to(self.device))}")
+        if x.device.type!=self.device.type:
+            x=x.to(self.device)
+        if y.device.type!=self.device.type:
+            y=y.to(self.device)
+        return cross_entropy(self.run(x), y)
 
         
 
@@ -523,4 +559,25 @@ class DigitConvolutionalModel(Module):
         Trains the model.
         """
         """ YOUR CODE HERE """
+        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
+        dataloader = DataLoader(dataset, batch_size=10, shuffle=True)
+        max_round=30000
+        required_accuracy=0.99
+        round_cnt=0
+        while round_cnt<max_round:
+            for sample in dataloader:
+                x = sample['x'].to(self.device)
+                y = sample['label'].to(self.device)
+                loss = self.get_loss(x, y)
+                if dataset.get_validation_accuracy() > required_accuracy:
+                    break
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                round_cnt+=1
+                if round_cnt%1==0:
+                    print(f"round: {round_cnt}, accuracy: {dataset.get_validation_accuracy()}")
+            if dataset.get_validation_accuracy() > required_accuracy:
+                break
+        print(f"round: {round_cnt}, accuracy: {dataset.get_validation_accuracy()}")
  
\ No newline at end of file