dtr.py 22.6 KB
Newer Older
Alan Taylor's avatar
Alan Taylor committed
1 2 3 4
#!/usr/bin/env python3
"""
Distributed single frame render for Blender Cycles

Alan Taylor's avatar
Alan Taylor committed
5 6 7 8 9
do not set multiprocessing.set_start_method() and let default behaviour
prevail. this will use 'fork' on non-Windows platforms, and only use 'spawn'
(with its slow process startup characteristics) on Windows where 'fork' isn't
available.

Alan Taylor's avatar
Alan Taylor committed
10
------------------------------------------------------------------------------
Alan Taylor's avatar
Alan Taylor committed
11
Copyright 2015-2017 Alan Taylor
Alan Taylor's avatar
Alan Taylor committed
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29

This file is part of dtr.

dtr is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

dtr is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with dtr.  If not, see <http://www.gnu.org/licenses/>.
------------------------------------------------------------------------------
"""

Alan Taylor's avatar
Alan Taylor committed
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
import math                     # ceil, log
import multiprocessing as mp    # Process, Queue
import os                       # path.isfile
import queue                    # Empty exception
import subprocess               # DEVNULL, Popen
import sys                      # exit, version
import time                     # sleep, time

import dtr_analysis as ana      # create_heatmap, display_basic_render_info
import dtr_benchmark as bm      # display_bench_cache
import dtr_data_struct as ds    # FILENAME_CONFIG_BLENDER
                                # FILENAME_FOR_RENDER
                                # FILENAME_RESTART_CONFIG
                                # FILENAME_RESTART_PROGRESS
                                # Production, UserExit
import dtr_file_io as fio       # backup_render,
                                # create_final_image_from_blocks,
                                # delete_blocks, distribute_files_to_node,
                                # progress_write, tidy_up_temporary_files
import dtr_init as init         # check_arguments, display_render_details,
                                # normal_start, restart_interrupted_render
import dtr_utils as utils       # block_render_filename, check_python_version
                                # despatch_order, node_alive,
                                # package_filename_for_cygwin
Alan Taylor's avatar
Alan Taylor committed
54 55 56


##############################################################################
Alan Taylor's avatar
Alan Taylor committed
57
# process support
Alan Taylor's avatar
Alan Taylor committed
58 59
##############################################################################

Alan Taylor's avatar
Alan Taylor committed
60
def replace_reorder(image_config, pipeline, block_num):
Alan Taylor's avatar
Alan Taylor committed
61
    """
Alan Taylor's avatar
Alan Taylor committed
62 63 64 65 66
    in the cases where the block render itself could not be completed or we
    could not fetch a completed block from a node - both usually occur because
    of network outages or a node otherwise going down - the salvaged
    block number would be rendered out of order leaving a preview image with
    a missing block (most probably) in the region of interest.
Alan Taylor's avatar
Alan Taylor committed
67

Alan Taylor's avatar
Alan Taylor committed
68 69
    this function reorders the blocks; due to the nature of the failure,
    this function should be called very rarely
Alan Taylor's avatar
Alan Taylor committed
70 71
    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
72
        image_config : image_config dictionary
73
            contains core information about the image to be rendered
Alan Taylor's avatar
Alan Taylor committed
74 75 76 77 78
        pipeline : instance of class Production
            contains all the queues through which the production pipeline
            processes communicate
        block_num : int
            block number to be rendered again
Alan Taylor's avatar
Alan Taylor committed
79
    --------------------------------------------------------------------------
Alan Taylor's avatar
Alan Taylor committed
80
    returns : none
Alan Taylor's avatar
Alan Taylor committed
81 82
    --------------------------------------------------------------------------
    """
Alan Taylor's avatar
Alan Taylor committed
83 84 85 86 87
    with pipeline.lock:
        # empty the queue into local storage
        blocks_to_render = set()
        while not pipeline.blocks.empty():
            blocks_to_render.add(pipeline.blocks.get())
Alan Taylor's avatar
Alan Taylor committed
88

Alan Taylor's avatar
Alan Taylor committed
89 90
        # place failed block into set
        blocks_to_render.add(block_num)
Alan Taylor's avatar
Alan Taylor committed
91

Alan Taylor's avatar
Alan Taylor committed
92 93 94
        # set the despatch order given by the user
        blocks = list(blocks_to_render)
        utils.despatch_order(image_config, blocks)
Alan Taylor's avatar
Alan Taylor committed
95

Alan Taylor's avatar
Alan Taylor committed
96 97 98
        # refill the queue
        for block in blocks:
            pipeline.blocks.put(block)
Alan Taylor's avatar
Alan Taylor committed
99

Alan Taylor's avatar
Alan Taylor committed
100
def collect_block_from_node(image_config, block_info):
Alan Taylor's avatar
Alan Taylor committed
101
    """
Alan Taylor's avatar
Alan Taylor committed
102
    return true if the file was read with no errors
Alan Taylor's avatar
Alan Taylor committed
103

Alan Taylor's avatar
Alan Taylor committed
104 105
    scp options:
    -q    quiet mode
Alan Taylor's avatar
Alan Taylor committed
106 107
    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
108 109 110 111 112 113 114
        image_config : image_config dictionary
            contains core information about the image to be rendered
        block_info : list [x, y, z]
            where:
            x = instance of RenderNode, the node we are collecting from
            y = int, block number
            z = int, time it took to render block in seconds
Alan Taylor's avatar
Alan Taylor committed
115 116
    --------------------------------------------------------------------------
    returns
Alan Taylor's avatar
Alan Taylor committed
117 118
        bool
            true if the block could be retrieved, false otherwise
Alan Taylor's avatar
Alan Taylor committed
119 120
    --------------------------------------------------------------------------
    """
Alan Taylor's avatar
Alan Taylor committed
121 122
    remote_filename = block_info[0].username + '@' + block_info[0].ip_address + ':' + \
        utils.block_render_filename(image_config, block_info[1])
Alan Taylor's avatar
Alan Taylor committed
123

Alan Taylor's avatar
Alan Taylor committed
124 125 126
    with subprocess.Popen(['scp', '-q', remote_filename, '.'], \
        stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) as proc:
        proc.wait()
Alan Taylor's avatar
Alan Taylor committed
127

Alan Taylor's avatar
Alan Taylor committed
128
    return proc.returncode == 0
Alan Taylor's avatar
Alan Taylor committed
129

Alan Taylor's avatar
Alan Taylor committed
130
def render_block(image_config, node, block):
Alan Taylor's avatar
Alan Taylor committed
131
    """
Alan Taylor's avatar
Alan Taylor committed
132
    render a block on a remote node
Alan Taylor's avatar
Alan Taylor committed
133

Alan Taylor's avatar
Alan Taylor committed
134 135
    run asynchronously using multiprocessing.Process from
    asynchronous_render()
Alan Taylor's avatar
Alan Taylor committed
136

Alan Taylor's avatar
Alan Taylor committed
137
    ---
Alan Taylor's avatar
Alan Taylor committed
138

Alan Taylor's avatar
Alan Taylor committed
139 140
    This function maintains long running ssh sessions, and some specific
    behaviour has been noticed during network outages:
Alan Taylor's avatar
Alan Taylor committed
141

Alan Taylor's avatar
Alan Taylor committed
142 143
    (1) The network can disconnect for short periods (for example, a few
    minutes) without any effect on performance.
Alan Taylor's avatar
Alan Taylor committed
144

Alan Taylor's avatar
Alan Taylor committed
145 146 147 148 149
    (2) If the network disconnects for a long period of time however
    (for example, over 20 minutes), proc.wait() may block permanently
    and the render node concerned will receive no further blocks to render
    until the script is halted then restarted, even if connectivity is
    restored.
Alan Taylor's avatar
Alan Taylor committed
150

Alan Taylor's avatar
Alan Taylor committed
151
    ---
Alan Taylor's avatar
Alan Taylor committed
152

Alan Taylor's avatar
Alan Taylor committed
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
    For the case of (2) above, this behaviour can be modified by the use of
    ServerAliveInterval and ServerAliveCountMax to allow ssh to exit cleanly
    before the point is reached when proc.wait() blocks permanently.
    See the man page for ssh_config for further information.

    However, such early termination means that it is probable that the existing
    Blender process will remain running on the remote node, and it will still
    be running when the next block is sent to the node.  Two concurrent renders
    running on the same node will not interfere with each other, but the
    initial render process will produce no useful output, and will simply slow
    down the new render process.

    This negative effect can be sidestepped using 'ssh -t' which causes ssh to
    kill the remote blender render on exit, but this consistently messes up
    the terminal configuration, stopping carriage returns being echoed.
    This effect can be corrected with 'stty sane', but this is not effective
    in all cases.
Alan Taylor's avatar
Alan Taylor committed
170 171 172

    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
173 174 175 176 177 178
        image_config : image_config dictionary
            contains core information about the image to be rendered
        node : RenderNode instance
            contains information about the node we are checking
        block : int
            the number of the block to be rendered
Alan Taylor's avatar
Alan Taylor committed
179 180 181
    --------------------------------------------------------------------------
    returns
        bool
Alan Taylor's avatar
Alan Taylor committed
182
            True if the render completed correctly, False otherwise
Alan Taylor's avatar
Alan Taylor committed
183 184
    --------------------------------------------------------------------------
    """
Alan Taylor's avatar
Alan Taylor committed
185 186
    on_this_node = node.username + '@' + node.ip_address
    block_number_padded = str(block).zfill(image_config['padding'])
Alan Taylor's avatar
Alan Taylor committed
187

Alan Taylor's avatar
Alan Taylor committed
188 189 190 191
    filename_render = utils.package_filename_for_cygwin(node.opsys, \
        ds.FILENAME_FOR_RENDER)
    filename_python = utils.package_filename_for_cygwin(node.opsys, \
        ds.FILENAME_CONFIG_BLENDER + block_number_padded + '.py')
Alan Taylor's avatar
Alan Taylor committed
192

Alan Taylor's avatar
Alan Taylor committed
193 194
    render_the_block = node.binloc + ' -b ' + filename_render + \
        ' --python ' + filename_python
Alan Taylor's avatar
Alan Taylor committed
195

Alan Taylor's avatar
Alan Taylor committed
196 197 198 199 200 201 202 203 204 205
    # force exit approximately 6 minutes after the network has gone down,
    # this will NOT kill the Blender instance running on the remote node,
    # even when the network outage ends
    with subprocess.Popen(['ssh', \
        '-o', 'ServerAliveInterval=360', '-o', 'ServerAliveCountMax=1', \
        on_this_node, render_the_block], \
        stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) as proc:
        proc.wait()

    return proc.returncode == 0
Alan Taylor's avatar
Alan Taylor committed
206 207


Alan Taylor's avatar
Alan Taylor committed
208 209 210 211 212 213 214
##############################################################################
# start up and shut down processes
##############################################################################

def start_processes(nodes, image_config, rpipe):
    """
    start up processes
Alan Taylor's avatar
Alan Taylor committed
215 216 217

    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
218
        nodes : list of RenderNode instances
Alan Taylor's avatar
Alan Taylor committed
219
            contains information about all the nodes in the cluster
Alan Taylor's avatar
Alan Taylor committed
220 221 222 223 224
        image_config : image_config dictionary
            contains core information about the image to be rendered
        rpipe : instance of class Production
            contains all the queues through which the production pipeline
            processes communicate
Alan Taylor's avatar
Alan Taylor committed
225 226
    --------------------------------------------------------------------------
    returns
Alan Taylor's avatar
Alan Taylor committed
227 228 229 230
        parfun : list of multiprocessing.context.Process
            references for all render and collect processes
        info : multiprocessing.context.Process
            reference for message process
Alan Taylor's avatar
Alan Taylor committed
231 232 233
    --------------------------------------------------------------------------
    """

Alan Taylor's avatar
Alan Taylor committed
234 235 236
    # start up a message display process
    info = mp.Process(target=message, args=(rpipe,))
    info.start()
Alan Taylor's avatar
Alan Taylor committed
237

Alan Taylor's avatar
Alan Taylor committed
238 239 240 241
    # initialise a render process for each node
    parfun = []
    for node in nodes:
        parfun.append(mp.Process(target=render, args=(image_config, node, rpipe)))
Alan Taylor's avatar
Alan Taylor committed
242

Alan Taylor's avatar
Alan Taylor committed
243 244 245 246 247
    # initialise an appropriate number of collect processes
    num_nodes = len(nodes)
    num_processes = 1 if num_nodes < 2 else math.ceil(math.log(num_nodes))
    for i in range(num_processes):
        parfun.append(mp.Process(target=collect, args=(image_config, rpipe)))
Alan Taylor's avatar
Alan Taylor committed
248

Alan Taylor's avatar
Alan Taylor committed
249 250 251
    # start up render and collect processes
    for pfu in parfun:
        pfu.start()
Alan Taylor's avatar
Alan Taylor committed
252

Alan Taylor's avatar
Alan Taylor committed
253
    return parfun, info
Alan Taylor's avatar
Alan Taylor committed
254

Alan Taylor's avatar
Alan Taylor committed
255
def stop_processes(rpipe, parfun, info):
Alan Taylor's avatar
Alan Taylor committed
256
    """
Alan Taylor's avatar
Alan Taylor committed
257
    shut down processes
Alan Taylor's avatar
Alan Taylor committed
258 259 260

    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
261 262 263 264 265 266 267
        rpipe : instance of class Production
            contains all the queues through which the production pipeline
            processes communicate
        parfun : list of multiprocessing.context.Process
            references for all render and collect processes
        info : multiprocessing.context.Process
            reference for message process
Alan Taylor's avatar
Alan Taylor committed
268 269 270 271
    --------------------------------------------------------------------------
    returns : none
    --------------------------------------------------------------------------
    """
Alan Taylor's avatar
Alan Taylor committed
272 273 274 275
    # indicate to render and collect processes that they can terminate now
    rpipe.terminate.put(True)
    for pfu in parfun:
        pfu.join()
Alan Taylor's avatar
Alan Taylor committed
276

Alan Taylor's avatar
Alan Taylor committed
277 278 279
    # indicate to the info process that it can terminate now
    rpipe.terminate_info.put(True)
    info.join()
Alan Taylor's avatar
Alan Taylor committed
280 281 282


##############################################################################
Alan Taylor's avatar
Alan Taylor committed
283
# processes
Alan Taylor's avatar
Alan Taylor committed
284 285
##############################################################################

Alan Taylor's avatar
Alan Taylor committed
286
def render(image_config, node, pipeline):
Alan Taylor's avatar
Alan Taylor committed
287
    """
Alan Taylor's avatar
Alan Taylor committed
288
    despatches blocks to be rendered on a given node
Alan Taylor's avatar
Alan Taylor committed
289

Alan Taylor's avatar
Alan Taylor committed
290 291 292 293 294 295 296 297 298
    checks the node is responsive before attemping to despatch block

    it is a very rare case that rendering the block will fail (the render
    process may have manually been killed on the remote node for some reason),
    in this case the block being rendered will be returned to the queue.
    as multiprocessing.queue is a FIFO, this block will be rendered last

    it is also a rare case that the call to node_alive will find the node down
    simply because the code spends the vast majority of its time elsewhere
Alan Taylor's avatar
Alan Taylor committed
299 300
    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
301
        image_config : image_config dictionary
Alan Taylor's avatar
Alan Taylor committed
302
            contains core information about the image to be rendered
Alan Taylor's avatar
Alan Taylor committed
303
        node : RenderNode instance
Alan Taylor's avatar
Alan Taylor committed
304
            contains information about the node we are checking
Alan Taylor's avatar
Alan Taylor committed
305 306 307
        pipeline : instance of class Production
            contains all the queues through which the production pipeline
            processes communicate
Alan Taylor's avatar
Alan Taylor committed
308
    --------------------------------------------------------------------------
Alan Taylor's avatar
Alan Taylor committed
309 310
    returns : none
        no explicit return, data transfer via pipeline queues
Alan Taylor's avatar
Alan Taylor committed
311 312
    --------------------------------------------------------------------------
    """
Alan Taylor's avatar
Alan Taylor committed
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
    # exit cleanly if anything is placed on the terminate queue
    while pipeline.terminate.empty():

        # make sure node is responsive before attemping to despatch a block
        if utils.node_alive(node):

            # get free block and despatch it for rendering
            try:
                with pipeline.lock:
                    block_num = pipeline.blocks.get(timeout=1)
            except queue.Empty:
                continue
            else:
                block_num_padded = str(block_num).rjust(image_config['padding'])
                pipeline.inform.put('block ' + block_num_padded + \
                    ' issued to node ' + node.ip_address)
                fio.distribute_files_to_node(image_config, node, block_num)
                t_start = time.time()
                if render_block(image_config, node, block_num):
                    render_duration = int(time.time() - t_start)
                    pipeline.collect.put([node, block_num, render_duration])
                else:
                    # handle rare failure mode gracefully
                    replace_reorder(image_config, pipeline, block_num)
                    pipeline.inform.put( \
                        'problem completing render of block ' + str(block_num) + \
                        ' on node ' + node.ip_address + \
                        ' - it will be rendered again')
        else:
            # the node was unresponsive last time it was checked,
            # so wait a while before checking again
            time.sleep(10)
Alan Taylor's avatar
Alan Taylor committed
345

Alan Taylor's avatar
Alan Taylor committed
346
def collect(image_config, pipeline):
Alan Taylor's avatar
Alan Taylor committed
347
    """
Alan Taylor's avatar
Alan Taylor committed
348 349 350 351 352
    collect rendered blocks from nodes

    collection typically happens at the same time as the node is starting to
    build the scene for its next block render, a phase of the render process
    that does not typically fully utilise a multicore processor.
Alan Taylor's avatar
Alan Taylor committed
353 354 355

    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
356 357 358 359 360
        image_config : image_config dictionary
            contains core information about the image to be rendered
        pipeline : instance of class Production
            contains all the queues through which the production pipeline
            processes communicate
Alan Taylor's avatar
Alan Taylor committed
361 362
    --------------------------------------------------------------------------
    returns : none
Alan Taylor's avatar
Alan Taylor committed
363
        no explicit return, data transfer via pipeline queues
Alan Taylor's avatar
Alan Taylor committed
364 365
    --------------------------------------------------------------------------
    """
Alan Taylor's avatar
Alan Taylor committed
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
    # exit cleanly if anything is placed on the terminate queue
    while pipeline.terminate.empty():
        try:
            block_info = pipeline.collect.get(timeout=1)
        except queue.Empty:
            continue
        else:
            block_num_padded = str(block_info[1]).rjust(image_config['padding'])
            nodeip = block_info[0].ip_address
            if collect_block_from_node(image_config, block_info):
                pipeline.check.put([nodeip, block_info[1], block_info[2]])
                pipeline.inform.put('block ' + block_num_padded + ' retrieved from ' + nodeip)
            else:
                # handle rare failure mode gracefully
                replace_reorder(image_config, pipeline, block_info[1])
                pipeline.inform.put('block ' + block_num_padded + \
                    ' could not be retrieved from ' + nodeip + \
                    ' and will rendered again')
Alan Taylor's avatar
Alan Taylor committed
384

Alan Taylor's avatar
Alan Taylor committed
385
def message(pipeline):
Alan Taylor's avatar
Alan Taylor committed
386
    """
Alan Taylor's avatar
Alan Taylor committed
387 388 389 390 391
    print messages for user

    other processes put items on pipeline.inform, this is the only function that
    gets items from it. make sure when the terminate signal is received, we
    do not quit if there are still pending messages on pipeline.inform.
Alan Taylor's avatar
Alan Taylor committed
392 393 394

    --------------------------------------------------------------------------
    args
Alan Taylor's avatar
Alan Taylor committed
395 396 397
        pipeline : instance of class Production
            contains all the queues through which the production pipeline
            processes communicate
Alan Taylor's avatar
Alan Taylor committed
398 399 400 401
    --------------------------------------------------------------------------
    returns : none
    --------------------------------------------------------------------------
    """
Alan Taylor's avatar
Alan Taylor committed
402 403 404 405 406
    while pipeline.terminate_info.empty() or not pipeline.inform.empty():
        try:
            info = pipeline.inform.get(timeout=1)
        except queue.Empty:
            continue
Alan Taylor's avatar
Alan Taylor committed
407
        else:
Alan Taylor's avatar
Alan Taylor committed
408
            print(info)
Alan Taylor's avatar
Alan Taylor committed
409 410


Alan Taylor's avatar
Alan Taylor committed
411 412 413
##############################################################################
# startup
##############################################################################
Alan Taylor's avatar
Alan Taylor committed
414 415 416 417

def main():
    """
    distributed render
Alan Taylor's avatar
Alan Taylor committed
418 419 420 421 422 423

    processes are used for render() as at least one process will be CPU bound
    if we are rendering on the machine this script is running on.

    processes are also used for collect() as tests using threads showed slow
    (though still successful) recovery from temporary network disruptions
Alan Taylor's avatar
Alan Taylor committed
424 425 426 427 428 429 430
    """
    script_start = time.time()

    # basic configuration data for image render, the options that may be
    # safely set by the user can be changed in file dtr_user_settings.txt
    image_config = {
        # USER SETTINGS
431 432
        'image_x': -1,                # image size, x axis
        'image_y': -1,                # image size, y axis
Alan Taylor's avatar
Alan Taylor committed
433 434 435 436 437 438 439 440
        'seed': 0,                    # Blender cycles noise seed
        'frame': 1,                   # frame number to be rendered
        'blocks_user': 0,             # give the user the ability to raise the number of blocks
        'textures_directory': '',     # textures associated with the render file
        'library_directory': '',      # library blender files associated with the render file
        'auto_backup': '',            # back up in-progress render to another machine using rsync
        'remove_alpha': False,        # remove alpha channel from final image
        'render_order': 'CENTRE',     # the order the blocks are rendered in
441
        'spatial_splits': False,      # use spatial splits: longer build time, faster render
Alan Taylor's avatar
Alan Taylor committed
442
        'heatmap': False,             # flag indicating whether to generate a heatmap post render
443
        'filetype': 'OPEN_EXR',       # filetype to render to
Alan Taylor's avatar
Alan Taylor committed
444 445 446 447 448

        # RESERVED SETTINGS
        'blocks_x': -1,               # number of blocks, x axis
        'blocks_y': -1,               # number of blocks, y axis
        'blocks_required': -1,        # total number of blocks required (blocks_x * blocks_y)
449
        'padding': -1,                # size of block numbers field
Alan Taylor's avatar
Alan Taylor committed
450 451 452 453 454 455 456 457 458 459 460 461
        'tile_size_x': 16,            # x axis size of Blender tile in pixels
        'tile_size_y': 16,            # y axis size of Blender tile in pixels
        'min_tiles_per_block': 16,    # n tiles per block, tile = tile_size_x * tile_size_y pixels
        'max_threads': 8,             # = number of cores (for cpus without SMT),
                                      #     = number of cores * 2 (for cpus with SMT)
                                      #     the value used should be the highest obtained
                                      #     from all the render nodes used
        'max_blocks': 1024            # when the image to be rendered is very large,
                                      #     limit the number of blocks to something sane
    }

    # check any command line arguments supplied by the user
462
    cl_args = init.check_arguments(image_config)
Alan Taylor's avatar
Alan Taylor committed
463 464 465

    # check if we are restarting from an earlier interrupted render
    if os.path.isfile(ds.FILENAME_RESTART_CONFIG) and os.path.isfile(ds.FILENAME_RESTART_PROGRESS):
Alan Taylor's avatar
Alan Taylor committed
466 467
        available_render_nodes, image_config, blocks_to_render, checked, \
            time_taken_by_previous_renders = init.restart_interrupted_render(cl_args)
Alan Taylor's avatar
Alan Taylor committed
468 469
    else:
        available_render_nodes = []
Alan Taylor's avatar
Alan Taylor committed
470
        checked = []
471
        blocks_to_render, time_taken_by_previous_renders = \
Alan Taylor's avatar
Alan Taylor committed
472 473
            init.normal_start(available_render_nodes, image_config)

Alan Taylor's avatar
Alan Taylor committed
474 475 476 477
    # settings have now been established, process deferred command line option
    if cl_args.map:
        image_config['heatmap'] = True

Alan Taylor's avatar
Alan Taylor committed
478 479 480 481 482 483 484 485 486 487 488
    # let the user know which nodes are active, and what their performance is
    bm.display_bench_cache(available_render_nodes)

    ##########################################################################
    # render
    ##########################################################################

    init.display_render_details(available_render_nodes, image_config)

    print('>> rendering')

Alan Taylor's avatar
Alan Taylor committed
489 490 491 492 493 494 495 496 497
    # define data structures for processes to use
    rpipe = ds.Production()

    # fill queue with blocks to be rendered
    for block in blocks_to_render:
        rpipe.blocks.put(block)

    # start up processes
    parfun, info = start_processes(available_render_nodes, image_config, rpipe)
Alan Taylor's avatar
Alan Taylor committed
498

Alan Taylor's avatar
Alan Taylor committed
499
    # main loop - check that all the block transfers have completed
Alan Taylor's avatar
Alan Taylor committed
500
    user_action = ds.UserExit()
Alan Taylor's avatar
Alan Taylor committed
501 502 503 504 505 506 507 508 509 510 511 512 513
    while len(checked) < image_config['blocks_required'] and not user_action.interrupt:
        try:
            block_info = rpipe.check.get(timeout=1)
        except queue.Empty:
            continue
        else:
            checked.append(block_info)
            fio.progress_write((available_render_nodes, checked, \
                time_taken_by_previous_renders + time.time() - script_start))
            fio.backup_render(image_config)

    # shut down processes
    stop_processes(rpipe, parfun, info)
Alan Taylor's avatar
Alan Taylor committed
514 515

    if user_action.interrupt:
516
        sys.exit('\nexiting: blender instances may still be running on render nodes')
Alan Taylor's avatar
Alan Taylor committed
517 518

    fio.create_final_image_from_blocks(image_config)
Alan Taylor's avatar
Alan Taylor committed
519
    ana.create_heatmap(image_config, checked)
Alan Taylor's avatar
Alan Taylor committed
520
    fio.backup_render(image_config)
521
    fio.tidy_up_temporary_files(available_render_nodes, image_config)
Alan Taylor's avatar
Alan Taylor committed
522
    ana.display_basic_render_info(script_start, time_taken_by_previous_renders, checked)
523

Alan Taylor's avatar
Alan Taylor committed
524 525 526 527 528

##############################################################################
if __name__ == '__main__':

    # exit if the version of Python running this script is too old
Alan Taylor's avatar
Alan Taylor committed
529
    utils.check_python_version()
Alan Taylor's avatar
Alan Taylor committed
530 531 532

    # proceed with distributed render
    main()