enable more test cases on xpu (#38572)
* enable glm4 integration cases on XPU, set xpu expectation for blip2 Signed-off-by: Matrix YAO <matrix.yao@intel.com> * more Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix style Signed-off-by: YAO Matrix <matrix.yao@intel.com> * refine wording Signed-off-by: YAO Matrix <matrix.yao@intel.com> * refine test case names Signed-off-by: YAO Matrix <matrix.yao@intel.com> * run Signed-off-by: YAO Matrix <matrix.yao@intel.com> * add gemma2 and chameleon Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix review comments Signed-off-by: YAO Matrix <matrix.yao@intel.com> --------- Signed-off-by: Matrix YAO <matrix.yao@intel.com> Signed-off-by: YAO Matrix <matrix.yao@intel.com>
This commit is contained in:
@@ -11,17 +11,18 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import gc
|
||||
import unittest
|
||||
|
||||
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, QuarkConfig
|
||||
from transformers.testing_utils import (
|
||||
cleanup,
|
||||
is_torch_available,
|
||||
require_accelerate,
|
||||
require_quark,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils.import_utils import is_quark_available
|
||||
|
||||
@@ -79,11 +80,10 @@ class QuarkTest(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
r"""
|
||||
TearDown function needs to be called at the end of each test to free the GPU memory and cache, also to
|
||||
TearDown function needs to be called at the end of each test to free the accelerator memory and cache, also to
|
||||
avoid unexpected behaviors. Please see: https://discuss.pytorch.org/t/how-can-we-release-gpu-memory-cache/14530/27
|
||||
"""
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
def test_memory_footprint(self):
|
||||
mem_quantized = self.quantized_model.get_memory_footprint()
|
||||
|
||||
Reference in New Issue
Block a user