From 9fe5ab364280325f77da15f3541960960961d144 Mon Sep 17 00:00:00 2001
From: John Smith <yfshi123@163.com>
Date: Sat, 22 Apr 2023 17:23:24 +0800
Subject: [PATCH] fix bug

---
 model_attn_mlp_patch.py                       | 35 +++++++++++++------
 .../gptq_for_llala_lora_monkey_patch.py       | 35 +++++++++++++------
 2 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/model_attn_mlp_patch.py b/model_attn_mlp_patch.py
index d2b0bc6..1b20465 100644
--- a/model_attn_mlp_patch.py
+++ b/model_attn_mlp_patch.py
@@ -209,6 +209,26 @@ class CustomLoraLayerMerged(torch.nn.Module):
         return q, v
 
 
+class LoraInjectionWrapper:
+
+    def __init__(self, module, lora_layer):
+        self.module = module
+        self.lora_layer = lora_layer
+
+    def apply(self):
+        self.module.forward_before_lora = self.module.forward
+        self.module.forward = self.forward_with_lora
+        self.module.is_lora_injected = True
+
+    def forward_with_lora(self, x):
+        result = self.module.forward_before_lora(x)
+        q, v = self.lora_layer(x)
+        dim = self.module.out_features // 3
+        result[:, :, :dim] += q
+        result[:, :, -dim:] += v
+        return result
+
+
 def inject_lora_layers(model, lora_path, device='cuda', dtype=torch.float16):
 
     print('Device: {}, dtype: {}'.format(device, dtype))
@@ -263,6 +283,7 @@ def inject_lora_layers(model, lora_path, device='cuda', dtype=torch.float16):
         lora_layers[prefix] = lora_layer
 
     # Injection
+    wrappers = []
     for n, m in model.named_modules():
         if 'qkv_proj' in n and isinstance(m, Autograd4bitQuantLinear):
             # restoring forward
@@ -270,16 +291,10 @@ def inject_lora_layers(model, lora_path, device='cuda', dtype=torch.float16):
                 m.forward = m.forward_before_lora
             prefix = re.findall('^model\.layers\.\d+\.', n)[0]
             lora_layer = lora_layers[prefix]
-            m.forward_before_lora = m.forward
-            def forward_with_lora(self, x):
-                result = self.forward_before_lora(x)
-                q, v = lora_layer(x)
-                dim = self.out_features // 3
-                result[:, :, :dim] += q
-                result[:, :, -dim:] += v
-                return result
-            m.forward = types.MethodType(forward_with_lora, m)
-            m.is_lora_injected = True
+            wrapper = LoraInjectionWrapper(m, lora_layer)
+            wrapper.apply()
+            wrappers.append(wrapper)
     
     print('Lora Injected.')
+    return wrappers
     
\ No newline at end of file
diff --git a/monkeypatch/gptq_for_llala_lora_monkey_patch.py b/monkeypatch/gptq_for_llala_lora_monkey_patch.py
index 29802c4..3f9dbdd 100644
--- a/monkeypatch/gptq_for_llala_lora_monkey_patch.py
+++ b/monkeypatch/gptq_for_llala_lora_monkey_patch.py
@@ -21,6 +21,26 @@ class CustomLoraLayerMerged(torch.nn.Module):
         return q, v
 
 
+class LoraInjectionWrapper:
+
+    def __init__(self, module, lora_layer):
+        self.module = module
+        self.lora_layer = lora_layer
+
+    def apply(self):
+        self.module.forward_before_lora = self.module.forward
+        self.module.forward = self.forward_with_lora
+        self.module.is_lora_injected = True
+
+    def forward_with_lora(self, x):
+        result = self.module.forward_before_lora(x)
+        q, v = self.lora_layer(x)
+        dim = self.module.outfeatures // 3
+        result[:, :, :dim] += q
+        result[:, :, -dim:] += v
+        return result
+    
+
 def inject_lora_layers(model, lora_path, device='cuda', dtype=torch.float16):
 
     print('Device: {}, dtype: {}'.format(device, dtype))
@@ -75,6 +95,7 @@ def inject_lora_layers(model, lora_path, device='cuda', dtype=torch.float16):
         lora_layers[prefix] = lora_layer
 
     # Injection
+    wrappers = []
     for n, m in model.named_modules():
         if 'qkv_proj' in n and isinstance(m, QuantLinear):
             # restoring forward
@@ -82,15 +103,9 @@ def inject_lora_layers(model, lora_path, device='cuda', dtype=torch.float16):
                 m.forward = m.forward_before_lora
             prefix = re.findall('^model\.layers\.\d+\.', n)[0]
             lora_layer = lora_layers[prefix]
-            m.forward_before_lora = m.forward
-            def forward_with_lora(self, x):
-                result = self.forward_before_lora(x)
-                q, v = lora_layer(x)
-                dim = self.outfeatures // 3
-                result[:, :, :dim] += q
-                result[:, :, -dim:] += v
-                return result
-            m.forward = types.MethodType(forward_with_lora, m)
-            m.is_lora_injected = True
+            wrapper = LoraInjectionWrapper(m, lora_layer)
+            wrapper.apply()
+            wrappers.append(wrapper)
     
     print('Lora Injected.')
+    return wrappers