From 09c85f8942bd9db45aa2fdd302cc8bcc910af812 Mon Sep 17 00:00:00 2001
From: ZhuangYumin <zhuangyumin@sjtu.edu.cn>
Date: Sat, 6 Jul 2024 01:48:52 +0800
Subject: [PATCH] accelerate ml q5 using GPU

---
 machinelearning/models.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/machinelearning/models.py b/machinelearning/models.py
index 63d4cff..cd4431d 100644
--- a/machinelearning/models.py
+++ b/machinelearning/models.py
@@ -457,27 +457,33 @@ def Convolve(input: tensor, weight: tensor):
 
     This returns a subtensor who's first element is tensor[y,x] and has height 'height, and width 'width'
     """
-    input_tensor_height, input_tensor_width = input.shape
-    weight_height, weight_width = weight.shape
-    
-    # Calculate output dimensions
-    output_height = input_tensor_height - weight_height + 1
-    output_width = input_tensor_width - weight_width + 1
-
     # Initialize output tensor
     if input.device.type!=Convolve.device.type:
         input=input.to(Convolve.device)
     if weight.device.type!=Convolve.device.type:
         weight=weight.to(Convolve.device)
-    output = torch.zeros((output_height, output_width),device=Convolve.device)
+    input_4d = input.unsqueeze(0).unsqueeze(0)  # Make it shape (1, 1, H, W)
+    weight_4d = weight.unsqueeze(0).unsqueeze(0)  # Make it shape (1, 1, kH, kW)
 
     # Perform convolution
-    for i in range(output_height):
-        for j in range(output_width):
-            output[i, j] = torch.tensordot(input[i:i+weight_height, j:j+weight_width], weight, dims=2)
+    output_4d = torch.nn.functional.conv2d(input_4d, weight_4d)
 
+    # Remove the extra dimensions
+    output = output_4d.squeeze(0).squeeze(0)
+
+    # input_tensor_height, input_tensor_width = input.shape
+    # weight_height, weight_width = weight.shape
+    
+    # # Calculate output dimensions
+    # output_height = input_tensor_height - weight_height + 1
+    # output_width = input_tensor_width - weight_width + 1
+    # output = torch.zeros((output_height, output_width),device=Convolve.device)
+
+    # # Perform convolution
+    # for i in range(output_height):
+    #     for j in range(output_width):
+    #         output[i, j] = torch.tensordot(input[i:i+weight_height, j:j+weight_width], weight, dims=2)
     return output
-
 Convolve.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")