diff --git a/contrib/Python/cntk/ops/tests/linear_test.py b/contrib/Python/cntk/ops/tests/linear_test.py
index 4af9d4e0a..c23d5e3e0 100644
--- a/contrib/Python/cntk/ops/tests/linear_test.py
+++ b/contrib/Python/cntk/ops/tests/linear_test.py
@@ -12,7 +12,7 @@ the forward and the backward pass
 from __future__ import division
 import numpy as np
 import pytest
-from .ops_test_utils import unittest_helper, AA, I, precision, PRECISION_TO_TYPE
+from .ops_test_utils import unittest_helper, AA, I, precision, PRECISION_TO_TYPE, batch_dense_to_sparse, left_matrix_type, right_matrix_type
 
 from ...graph import *
 from .. import *
@@ -47,7 +47,7 @@ def test_op_plus(left_operand, right_operand, device_id, precision):
     # we need two surrounding brackets
     # the first for sequences (length=1, since we have dynamic_axis='')
     # the second for batch of one sample
-    expected = [[AA(left_operand) + AA(right_operand)]]
+    expected = [[AA(left_operand, dtype=PRECISION_TO_TYPE[precision]) + AA(right_operand, dtype=PRECISION_TO_TYPE[precision])]]
 
     a = I([left_operand])
     b = I([right_operand])
@@ -86,7 +86,7 @@ def test_op_minus(left_operand, right_operand, device_id, precision):
     # we need two surrounding brackets
     # the first for sequences (length=1, since we have dynamic_axis='')
     # the second for batch of one sample
-    expected = [[AA(left_operand) - AA(right_operand)]]
+    expected = [[AA(left_operand, dtype=PRECISION_TO_TYPE[precision]) - AA(right_operand, dtype=PRECISION_TO_TYPE[precision])]]
 
     a = I([left_operand])
     b = I([right_operand])
@@ -124,7 +124,7 @@ def test_op_element_times(left_operand, right_operand, device_id, precision):
     # we need two surrounding brackets
     # the first for sequences (length=1, since we have dynamic_axis='')
     # the second for batch of one sample
-    expected = [[AA(left_operand) * AA(right_operand)]]
+    expected = [[AA(left_operand, dtype=PRECISION_TO_TYPE[precision]) * AA(right_operand, dtype=PRECISION_TO_TYPE[precision])]]
 
     a = I([left_operand])
     b = I([right_operand])
@@ -163,7 +163,7 @@ def test_op_element_divide(left_operand, right_operand, device_id, precision):
     # we need two surrounding brackets
     # the first for sequences (length=1, since we have dynamic_axis='')
     # the second for batch of one sample
-    expected = [[AA(left_operand) / AA(right_operand)]]
+    expected = [[AA(left_operand, dtype=PRECISION_TO_TYPE[precision]) / AA(right_operand, dtype=PRECISION_TO_TYPE[precision])]]
 
     a = I([left_operand])
     b = I([right_operand])
@@ -184,14 +184,14 @@ def test_op_element_divide(left_operand, right_operand, device_id, precision):
     # For left: d/da (a/b) = 1/b
     # For right: d/db (a/b) = a * d/db (1/b) = a * -1/b^2 = -a/b^2
     expected_left = [[[np.ones_like(x) / x for x in right_operand]]]
-    expected_right = [[-AA(left_operand) / AA(right_operand)**2]]
+    expected_right = [[-AA(left_operand, dtype=PRECISION_TO_TYPE[precision]) / AA(right_operand, dtype=PRECISION_TO_TYPE[precision])**2]]
 
     unittest_helper(left_as_input, None, expected_left, device_id=device_id,
                     precision=precision, clean_up=True, backward_pass=True, input_node=a)
     unittest_helper(right_as_input, None, expected_right, device_id=device_id,
                     precision=precision, clean_up=True, backward_pass=True, input_node=b)
 
-TENSORS = [
+IDENTITY_TENSORS = [
     ([30.]),
     ([[30.]]),
     ([[1.5, 2.1]]),
@@ -201,7 +201,7 @@ TENSORS = [
 
 # -- identity function tests --
 
-@pytest.mark.parametrize("tensor", TENSORS)
+@pytest.mark.parametrize("tensor", IDENTITY_TENSORS)
 def test_op_identity(tensor, device_id, precision):
 
     def numpy_op(x):
@@ -232,3 +232,82 @@ def test_op_identity(tensor, device_id, precision):
     unittest_helper(op_node, None, expected, device_id=device_id,
                     precision=precision, clean_up=True, backward_pass=True,
                     input_node=input_node)
+
+
+TIMES_PAIRS = [
+    ([[30.]], [[10.]]),
+    ([[1.5, 2.1]], [[10.], [20.]]),
+    ([[100., 200.], [300., 400.]], [[10.], [20.]]),
+]
+
+@pytest.mark.parametrize("left_operand, right_operand", TIMES_PAIRS)
+def test_op_times(left_operand, right_operand, device_id, precision,
+        left_matrix_type, right_matrix_type):
+    if left_matrix_type == 'sparse':
+        pytest.skip('first operator of times() has to be dense')
+
+    dt = PRECISION_TO_TYPE[precision]
+    # Forward pass test
+    #==================
+    # we compute the expected output for the forward pass
+    # we need two surrounding brackets
+    # the first for sequences (length=1, since we have dynamic_axis='')
+    # the second for batch of one sample
+    expected = [[np.dot(AA(left_operand, dtype=dt), AA(right_operand, dtype=dt))]]
+
+    a = I([left_operand])
+
+    if right_matrix_type == 'sparse':
+        b = SI(*batch_dense_to_sparse([right_operand]))
+    else:
+        b = I([right_operand])
+
+    from cntk.ops import times, constant
+    left_as_input = times(a, constant(right_operand))
+    right_as_input = times(constant(left_operand), b)
+
+    unittest_helper(left_as_input, None, expected, device_id=device_id,
+                    precision=precision, clean_up=True, backward_pass=False)
+
+    unittest_helper(right_as_input, None, expected, device_id=device_id,
+                    precision=precision, clean_up=True, backward_pass=False)
+
+    unittest_helper(times(a, b), None, expected, device_id=device_id,
+                    precision=precision, clean_up=True, backward_pass=False)
+
+
+    # Backward pass test
+    #==================
+
+    def op_grad(A, B):
+        '''
+        Compute derivative of A with respect to B. For simplicity, assume A
+        and B to be matrices.
+        Let A be 2x2 and B be 2x1, then we have
+        [a11 a12] [b11]  = [ a11 b11 + a12 b21 ]
+        [a21 a22] [b21]    [ a21 b11 + a22 b21 ]
+
+        The derivative for A with respect to B is
+        [b11 b21]
+        [b11 b21]
+
+        The derivative for B with respect to A:
+        [a11 + a12]
+        [a21 + a22]
+        '''
+        assert len(A.shape) == len(B.shape) == 2
+        D = np.zeros_like(A)
+        D[:,:] = B.sum(axis=1)
+        
+        return D
+
+    if 'sparse' not in [left_matrix_type, right_matrix_type]:
+        # FIXME: disabling until the Pass node supports sparse 
+        expected_left = [[op_grad(AA(left_operand, dtype=dt), AA(right_operand, dtype=dt))]]
+        expected_right = [[op_grad(AA(right_operand, dtype=dt).T, AA(left_operand, dtype=dt).T).T]]
+
+        unittest_helper(left_as_input, None, expected_left, device_id=device_id,
+                        precision=precision, clean_up=True, backward_pass=True, input_node=a)
+        # BUG: Fails because of Pass node?
+        unittest_helper(right_as_input, None, expected_right, device_id=device_id,
+                        precision=precision, clean_up=True, backward_pass=True, input_node=b)