...
 
Commits (6)
......@@ -29,6 +29,17 @@ cc_library(
name = "gl_delegate",
srcs = ["gl_delegate.cc"],
hdrs = ["gl_delegate.h"],
linkopts = select({
"//tensorflow:android": [
"-lEGL",
# We don't need to link libGLESv3, because if it exists,
# it is a symlink to libGLESv2.
# See Compatibility Definition Document:
# https://source.android.com/compatibility/10/android-10-cdd#7_1_4_1_opengl_es
"-lGLESv2",
],
"//conditions:default": [],
}),
deps = [
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/types:span",
......@@ -109,6 +120,11 @@ cc_binary(
linkopts = [
"-Wl,-soname=libtensorflowlite_gpu_gl.so",
] + select({
"//tensorflow:android": [
"-lEGL",
"-lGLESv3",
"-fvisibility=hidden",
],
"//tensorflow:windows": [],
"//conditions:default": [
"-fvisibility=hidden",
......@@ -120,7 +136,7 @@ cc_binary(
"nobuilder",
"notap",
],
deps = [":gl_delegate"] + tflite_extra_gles_deps(),
deps = [":gl_delegate"],
)
# build -c opt --config android_arm64 --copt -Os --copt -DTFLITE_GPU_BINARY_RELEASE --copt --linkopt -s --strip always :libtensorflowlite_gpu_delegate.so
......@@ -129,6 +145,11 @@ cc_binary(
linkopts = [
"-Wl,-soname=libtensorflowlite_gpu_delegate.so",
] + select({
"//tensorflow:android": [
"-lEGL",
"-lGLESv3",
"-fvisibility=hidden",
],
"//tensorflow:windows": [],
"//conditions:default": [
"-fvisibility=hidden",
......@@ -140,7 +161,7 @@ cc_binary(
"nobuilder",
"notap",
],
deps = [":delegate"] + tflite_extra_gles_deps(),
deps = [":delegate"],
)
# bazel build -c opt --cpu ios_arm64 --copt -Os --copt -DTFLITE_GPU_BINARY_RELEASE --copt -fvisibility=hidden --linkopt -s --strip always --cxxopt=-std=c++14 :libtensorflowlite_gpu_metal --apple_platform_type=ios
......@@ -200,9 +221,18 @@ cc_library(
name = "delegate",
srcs = ["delegate.cc"],
hdrs = ["delegate.h"],
linkopts = select({
"//tensorflow:android": [
"-lEGL",
# We don't need to link libGLESv3, because if it exists,
# it is a symlink to libGLESv2.
# See Compatibility Definition Document:
# https://source.android.com/compatibility/10/android-10-cdd#7_1_4_1_opengl_es
"-lGLESv2",
],
"//conditions:default": [],
}),
deps = [
"@com_google_absl//absl/memory",
"@com_google_absl//absl/types:span",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite/c:common",
......@@ -216,5 +246,7 @@ cc_library(
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/gl:api2",
"//tensorflow/lite/kernels/internal:optimized_base",
] + tflite_extra_gles_deps(),
"@com_google_absl//absl/memory",
"@com_google_absl//absl/types:span",
],
)
load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
load("//tensorflow/core/platform:build_config_root.bzl", "tf_gpu_tests_tags")
load("//tensorflow/lite:special_rules.bzl", "tflite_extra_gles_deps")
load(
"//tensorflow/core/platform:build_config_root.bzl",
"tf_gpu_tests_tags",
)
package(
default_visibility = ["//visibility:public"],
......@@ -265,11 +267,17 @@ cc_library(
name = "gpu_api_delegate",
srcs = ["gpu_api_delegate.cc"],
hdrs = ["gpu_api_delegate.h"],
linkopts = select({
"//tensorflow:android": [
"-lEGL",
"-lGLESv3",
],
"//conditions:default": [],
}),
deps = [
":api",
":opencl_wrapper",
":tensor_type_util",
"@com_google_absl//absl/types:span",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite/c:common",
"//tensorflow/lite/delegates/gpu:api",
......@@ -279,7 +287,8 @@ cc_library(
"//tensorflow/lite/delegates/gpu/common:model_transformer",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common/transformations:general_transformations",
] + tflite_extra_gles_deps(),
"@com_google_absl//absl/types:span",
],
)
cc_library(
......
......@@ -35,6 +35,10 @@ cc_library(
cc_test(
name = "opengl_test",
srcs = ["opengl_test.cc"],
linkopts = [
"-lEGL",
"-lGLESv3",
],
tags = tf_gpu_tests_tags() + [
"local",
"nobuilder",
......
load("//tensorflow/lite:special_rules.bzl", "tflite_extra_gles_deps", "tflite_portable_test_suite_combined")
load("//tensorflow/core/platform:build_config_root.bzl", "tf_gpu_tests_tags")
load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite_combined")
load(
"//tensorflow/core/platform:build_config_root.bzl",
"tf_gpu_tests_tags",
)
package(
default_visibility = ["//visibility:public"],
......@@ -29,6 +32,10 @@ cc_test(
name = "converter_test",
size = "small",
srcs = ["converter_test.cc"],
linkopts = [
"-lEGL",
"-lGLESv3",
],
tags = tf_gpu_tests_tags() + [
"local",
"nobuilder",
......@@ -37,15 +44,15 @@ cc_test(
],
deps = [
":converter",
"@com_google_googletest//:gtest_main",
"@com_google_absl//absl/types:span",
"//tensorflow/lite/delegates/gpu/common:convert",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/gl:egl_environment",
"//tensorflow/lite/delegates/gpu/gl:gl_buffer",
"//tensorflow/lite/delegates/gpu/gl:portable",
] + tflite_extra_gles_deps(),
"@com_google_absl//absl/types:span",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
......@@ -648,9 +655,11 @@ cc_library(
testonly = 1,
srcs = ["test_util.cc"],
hdrs = ["test_util.h"],
linkopts = [
"-lEGL",
"-lGLESv3",
],
deps = [
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
"//tensorflow/lite/delegates/gpu/common:model",
"//tensorflow/lite/delegates/gpu/common:operations",
"//tensorflow/lite/delegates/gpu/common:status",
......@@ -664,7 +673,9 @@ cc_library(
"//tensorflow/lite/delegates/gpu/gl:request_gpu_info",
"//tensorflow/lite/delegates/gpu/gl:runtime_options",
"//tensorflow/lite/delegates/gpu/gl/workgroups:default_calculator",
] + tflite_extra_gles_deps(),
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
......
......@@ -13,6 +13,13 @@ cc_library(
name = "native",
srcs = ["gpu_delegate_jni.cc"],
copts = tflite_copts(),
linkopts = select({
"//tensorflow:android": [
"-lGLESv3",
"-lEGL",
],
"//conditions:default": [],
}),
tags = [
"manual",
"notap",
......
......@@ -40,7 +40,6 @@ $(MAKEFILE_DIR)/downloads/$(AM_SDK_DEST)/$(SF_BSPS_DEST): $(MAKEFILE_DIR)/downlo
-fmessage-length=0 \
-fno-exceptions \
-fno-unwind-tables \
-fno-builtin \
-ffunction-sections \
-fdata-sections \
-funsigned-char \
......
......@@ -19,7 +19,6 @@ ifeq ($(TARGET), bluepill)
-fmessage-length=0 \
-fno-exceptions \
-fno-unwind-tables \
-fno-builtin \
-ffunction-sections \
-fdata-sections \
-funsigned-char \
......
......@@ -27,7 +27,6 @@ ifeq ($(TARGET), ecm3531)
-fmessage-length=0 \
-fno-exceptions \
-fno-unwind-tables \
-fno-builtin \
-ffunction-sections \
-fdata-sections \
-funsigned-char \
......
......@@ -40,7 +40,6 @@ ifeq ($(TARGET), hexagon)
-fdata-sections \
-ffunction-sections \
-fmessage-length=0 \
-fno-builtin \
-fno-delete-null-pointer-checks \
-fno-exceptions \
-fno-register-global-dtors-with-atexit \
......
......@@ -16,7 +16,6 @@ ifeq ($(TARGET), riscv32_mcu)
-DTF_LITE_MCU_DEBUG_LOG \
-DTF_LITE_USE_GLOBAL_CMATH_FUNCTIONS \
-fno-unwind-tables \
-fno-builtin \
-ffunction-sections \
-fdata-sections \
-funsigned-char \
......
......@@ -16,7 +16,6 @@ ifeq ($(TARGET), stm32f4)
-fmessage-length=0 \
-fno-exceptions \
-fno-unwind-tables \
-fno-builtin \
-ffunction-sections \
-fdata-sections \
-funsigned-char \
......
......@@ -1181,6 +1181,23 @@ distribute_py_test(
],
)
distribute_py_test(
name = "strategy_reduce_test",
srcs = ["strategy_reduce_test.py"],
main = "strategy_reduce_test.py",
tags = [
"multi_and_single_gpu",
],
deps = [
":combinations",
":strategy_combinations",
"//tensorflow/python:errors",
"//tensorflow/python:variables",
"//tensorflow/python/eager:test",
"@absl_py//absl/testing:parameterized",
],
)
distribute_py_test(
name = "minimize_loss_test",
srcs = ["minimize_loss_test.py"],
......
......@@ -26,6 +26,7 @@ import numpy as np
from tensorflow.python import keras
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import reduce_util
from tensorflow.python.distribute import strategy_combinations
from tensorflow.python.eager import backprop
from tensorflow.python.eager import def_function
......@@ -448,6 +449,35 @@ class KerasModelsTest(test.TestCase, parameterized.TestCase):
train_step(input_iterator)
@combinations.generate(
combinations.combine(
distribution=strategy_combinations.all_strategies, mode=["eager"]))
def test_reduce_loss(self, distribution):
inputs = np.zeros((10, 4), dtype=np.float32)
targets = np.zeros((10, 1), dtype=np.float32)
dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
dataset = dataset.batch(10, drop_remainder=False)
input_iterator = iter(distribution.experimental_distribute_dataset(dataset))
with distribution.scope():
x = keras.layers.Input(shape=(4), name="input")
y = keras.layers.Dense(3, name="dense")(x)
model = keras.Model(x, y)
@def_function.function
def train_step(iterator):
def step_fn(inputs):
images, targets = inputs
outputs = model(images)
loss = keras.losses.sparse_categorical_crossentropy(targets, outputs)
return loss
return distribution.run(step_fn, args=(next(iterator),))
loss = train_step(input_iterator)
loss = distribution.reduce(reduce_util.ReduceOp.MEAN, loss, axis=0)
@combinations.generate(
combinations.combine(
distribution=strategy_combinations.tpu_strategies, mode=["eager"]))
......
......@@ -114,6 +114,7 @@ from tensorflow.python.distribute import distribution_strategy_context
from tensorflow.python.distribute import numpy_dataset
from tensorflow.python.distribute import reduce_util
from tensorflow.python.eager import context as eager_context
from tensorflow.python.eager import def_function
from tensorflow.python.eager import monitoring
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
......@@ -628,6 +629,10 @@ class StrategyBase(object):
# a sensible value.
extended._retrace_functions_for_each_device = True
# Below are the dicts of axis(int) -> `tf.function`.
self._mean_reduce_helper_fns = {}
self._reduce_sum_fns = {}
@property
def extended(self):
"""`tf.distribute.StrategyExtended` with additional methods."""
......@@ -1014,8 +1019,25 @@ class StrategyBase(object):
if axis is None:
return self._extended._reduce(reduce_op, value) # pylint: disable=protected-access
if reduce_op == reduce_util.ReduceOp.SUM:
value = self.run(
lambda v: math_ops.reduce_sum(v, axis=axis), args=(value,))
def reduce_sum(v):
return math_ops.reduce_sum(v, axis=axis)
if eager_context.executing_eagerly():
# As some strategies (e.g. TPUStrategy) doesn't support pure eager
# execution, wrap the `reduce_sum_fn` with a `tf.function` so it can be
# run from eager mode. Cache the tf.function by `axis` to avoid the
# same function to be traced again.
if axis not in self._reduce_sum_fns:
def reduce_sum_fn(v):
return self.run(reduce_sum, args=(v,))
self._reduce_sum_fns[axis] = def_function.function(reduce_sum_fn)
value = self._reduce_sum_fns[axis](value)
else:
value = self.run(reduce_sum, args=(value,))
return self._extended._reduce(reduce_op, value) # pylint: disable=protected-access
if reduce_op != reduce_util.ReduceOp.MEAN:
raise TypeError("Expected `reduce_op` to be a `tf.distribute.ReduceOp`, "
......@@ -1062,7 +1084,22 @@ class StrategyBase(object):
# reduce is complete?
return numer, denom
numer, denom = self.run(mean_reduce_helper, args=(value,))
if eager_context.executing_eagerly():
# As some strategies (e.g. TPUStrategy) doesn't support pure eager
# execution, wrap the `mean_reduce_helper` with a `tf.function` so it can
# be run from eager mode. Cache the tf.function by `axis` to avoid the
# same function to be traced again.
if axis not in self._mean_reduce_helper_fns:
def mean_reduce_fn(v):
return self.run(mean_reduce_helper, args=(v,))
self._mean_reduce_helper_fns[axis] = def_function.function(
mean_reduce_fn)
numer, denom = self._mean_reduce_helper_fns[axis](value)
else:
numer, denom = self.run(mean_reduce_helper, args=(value,))
# TODO(josh11b): Should batch reduce here instead of doing two.
numer = self._extended._reduce(reduce_util.ReduceOp.SUM, numer) # pylint: disable=protected-access
denom = self._extended._reduce(reduce_util.ReduceOp.SUM, denom) # pylint: disable=protected-access
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for `strategy.reduce`."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import reduce_util
from tensorflow.python.distribute import strategy_combinations
from tensorflow.python.eager import def_function
from tensorflow.python.eager import test
from tensorflow.python.framework import constant_op
class StrategyReduceTest(test.TestCase, parameterized.TestCase):
@combinations.generate(
combinations.combine(
distribution=strategy_combinations.all_strategies,
mode=["eager"]
))
def test_reduce_with_axis(self, distribution):
@def_function.function
def fn():
return constant_op.constant([1., 2.])
x = distribution.run(fn)
x_m = distribution.reduce(reduce_util.ReduceOp.MEAN, x, axis=0)
self.assertEqual(1.5, self.evaluate(x_m))
x_s = distribution.reduce(reduce_util.ReduceOp.SUM, x, axis=0)
self.assertEqual(3 * distribution.num_replicas_in_sync, self.evaluate(x_s))
if __name__ == "__main__":
test.main()
......@@ -897,7 +897,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
if tensor_util.is_tensor(input_tensor):
rank = input_tensor.get_shape().rank
else:
rank = np.rank(input_tensor)
rank = np.ndim(input_tensor)
maximum_shape = tensor_shape.TensorShape([None] * rank)
maximum_shapes.append(maximum_shape)
maximum_shapes = nest.pack_sequence_as(replicate_inputs[0],
......