create_index.py 11.8 KB
Newer Older
1 2 3 4 5
# imports from python libraries
from os import path as OS_PATH
import time
from sys import exc_info as EXC_INFO

6
# imports from core django libraries
7
from django.core.management.base import BaseCommand
8 9 10


# imports from third-party app(s)
11
from django_mongokit.document import model_names
12
from mongokit.database import Database as MK_Database
13 14 15 16 17 18

try:
    from bson import ObjectId
except ImportError:  # old pymongo
    from pymongo.objectid import ObjectId

19
# imports from project's app(s)
20 21 22 23 24 25 26
from gnowsys_ndf.ndf.models import Node
from gnowsys_ndf.ndf.models import AttributeType, RelationType, MetaType, ProcessType, GSystemType
from gnowsys_ndf.ndf.models import Benchmark, Analytics
from gnowsys_ndf.ndf.models import GSystem, File, Group, Author
from gnowsys_ndf.ndf.models import Triple, GAttribute, GRelation
from gnowsys_ndf.ndf.models import ReducedDocs, ToReduceDocs, IndexedWordList
from gnowsys_ndf.ndf.models import node_holder
27 28
from gnowsys_ndf.ndf.models import db, node_collection, triple_collection, filehive_collection, counter_collection, benchmark_collection, filehive_collection, buddy_collection
from gnowsys_ndf.ndf.models import Filehive, Buddy, Counter
29 30 31 32 33 34 35 36 37 38 39 40

from gnowsys_ndf.ndf.models import INDEX_ASCENDING

####################################################################################################################

SCHEMA_ROOT = OS_PATH.join(OS_PATH.dirname(__file__), "schema_files")

log_list = []  # To hold intermediate errors
log_list_append = log_list.append

info_message = "\n######### Script run on : " + time.strftime("%c") + " #########\n" \
    + "############################################################"
41
log_list_append("\n" + info_message)
42 43 44
print info_message


45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
def get_index_name(index_fields_list):
    """Returns index name and updated index field's list."""
    if type(index_fields_list) is not list:
        index_fields_list = list(index_fields_list)

    index_name = ""
    for i, index_field_value in enumerate(index_fields_list):
        if type(index_field_value) is not tuple:
            index_fields_list[i] = index_field_value = \
                (index_field_value, INDEX_ASCENDING)
            index_field_value_updated = True
        index_name += "{0}_{1}_".format(index_field_value[0], \
            index_field_value[1])

    # Remove trailing '_'
    return index_name.rstrip('_'), index_fields_list


def ensure_collection(db_obj, collection_name):
    """Creates collection in given database, if it doesn't exists.

    Accepts parameters:
        > db_obj: Database object.
        > collection_name: Name of the collection name in string format.
    """
    if not isinstance(db_obj, MK_Database):
        raise Exception('\nDatabaseObject (ensure_index): Invalid database object found!\n')

    if collection_name not in db_obj.collection_names():
        db_obj.create_collection(collection_name)
        return "\nCollection creation: {0}... created successfully".format(collection_name)
    else:
        return "\nCollection creation: {0}... already created.".format(collection_name)


80 81 82 83 84 85 86 87
class Command(BaseCommand):
    help = "This script helps in creating/updating index(es) for a collection."

    def handle(self, *args, **options):
        try:
            # Iterate through all class(es) defined in models file
            # And create index(es) defined in "indexes" field
            # defined within each class definition
88
            print "\nFollowing are the model(s) defined: \n{0}".format(', '.join(map(lambda name_tuple: name_tuple[0], model_names)))
89 90
            collection_object_wrapper = {
                'Nodes': node_collection.collection,
91 92 93 94 95
                'Triples': triple_collection.collection,
                'Benchmark': benchmark_collection,
                'Filehive': filehive_collection,
                'Buddy': buddy_collection,
                'Counter': counter_collection
96 97
            }

98 99
            collection_index_dict = {}

100
            for each_class_tuple in model_names:
101
                time.sleep(0.2)
102 103 104 105 106
                indexes_defined_for_collection = []
                gstudio_collection_name = None

                class_variable = eval(each_class_tuple[0])
                try:
107 108 109 110 111
                    gstudio_collection_name = class_variable.collection_name
                    info_message = ensure_collection(db, gstudio_collection_name)
                    log_list_append("\n" + info_message)
                    print info_message

112
                    info_message = "\nCreating index(es) for {0}".format(class_variable)
113 114
                    time.sleep(0.2)
                    log_list_append("\n" + info_message)
115
                    print info_message
116

117
                    info_message = "Collection name: {0}".format(gstudio_collection_name)
118 119
                    time.sleep(0.2)
                    log_list_append("\n" + info_message)
120 121
                    print info_message

122 123 124 125 126 127 128 129 130 131 132 133 134 135
                    indexes_defined_for_collection = \
                        map(lambda indexes_dict: tuple(indexes_dict['fields']), \
                            class_variable.indexes)

                    indexes_to_create = indexes_defined_for_collection
                    if gstudio_collection_name in collection_index_dict:
                        # Filter already created indexes
                        indexes_to_create = set(indexes_defined_for_collection) \
                            .difference(map(lambda indexes_list: tuple(indexes_list), \
                                collection_index_dict[gstudio_collection_name]))

                    # Override latest list of indexes
                    collection_index_dict[gstudio_collection_name] = \
                        indexes_defined_for_collection
136 137 138 139

                    if not indexes_defined_for_collection:
                        info_message = "There is NO index defined for this collection."
                        print info_message
140 141 142 143 144 145 146 147 148 149 150 151 152 153
                        time.sleep(0.2)
                        log_list_append("\n" + info_message)

                    elif not indexes_to_create:
                        info_message = "Following indexes are already been created..."
                        print info_message
                        time.sleep(0.2)
                        log_list_append("\n" + info_message)
                        for index_fields_list in indexes_defined_for_collection:
                            # If only field-name is provided in the list
                            # Then reformat it in format as (field-name, index-order)
                            index_val = ""  # Value returned after index is created/updated

                            index_val, index_fields_list = get_index_name(index_fields_list)
154

155 156 157 158 159 160 161 162
                            info_message = "  {0}".format(index_val)
                            print info_message
                            log_list_append("\n" + info_message)

                        # As there are no indexes to create for given collection
                        # Continue processing with next collection
                        continue

163 164 165 166
                except Exception as e:
                    error_message = "Error in line #{0} ({1}) : {2} !!!" \
                        .format(EXC_INFO()[-1].tb_lineno, each_class_tuple[1], str(e))
                    print error_message
167
                    log_list_append("\n" + error_message)
168 169 170
                    continue

                # Iterate through various index field-name(s) defined or
171
                # field-tuple(s) [i.e. (field-name, indexing-order)] defined as
172
                # part of index-field-list in a given collection
173
                """
174 175
                for i, index_dict in enumerate(iter(indexes_defined_for_collection)):
                    index_fields_list = index_dict["fields"]
176 177 178
                """
                for i, index_fields_list in enumerate(indexes_to_create):
                    time.sleep(0.2)
179 180 181 182 183 184 185 186 187
                    index_type = "Single index"
                    index_val = ""  # Value returned after index is created/updated

                    if len(index_fields_list) > 1:
                        index_type = "Compound index"

                    info_message = "#{0} >>> Creating {1} on following field(s): {2}" \
                        .format((i+1), index_type, index_fields_list)
                    print info_message
188 189 190
                    time.sleep(0.4)
                    log_list_append("\n" + info_message)
                    info_message = ""
191 192 193 194

                    # If only field-name is provided in the list
                    # Then reformat it in format as (field-name, index-order)
                    index_field_value_updated = False
195 196

                    index_val, index_fields_list = get_index_name(index_fields_list)
197 198 199 200 201

                    if index_field_value_updated:
                        info_message = "Index field(s) list updated... {0}" \
                            .format(index_fields_list)
                        print info_message
202 203
                        time.sleep(0.2)
                        log_list_append("\n" + info_message)
204 205 206 207 208

                    # Set collection-object based on collection_name defined for
                    # given collection in models before creating/updating index
                    # e.g. gstudio_collection = node_collection.collection
                    try:
209
                        time.sleep(0.2)
210 211 212 213
                        gstudio_collection = collection_object_wrapper[gstudio_collection_name]
                    except Exception as e:
                        error_message = "Error in line #{0} (DoesNotExist): Collection-object wrapper for " \
                            + "collection {1} not found!".format(EXC_INFO()[-1].tb_lineno, gstudio_collection_name)
214 215
                        log_list_append("\n" + error_message)
                        print error_message
216 217
                        error_message = "IndexError: {0}... not created/updated!!!" \
                            .format(index_val)
218 219
                        log_list_append("\n" + error_message)
                        print "\n", error_message
220 221 222 223 224 225
                        continue

                    # Create/Update index
                    if index_val in gstudio_collection.index_information().keys():
                        info_message = "{0}... index has already been created!" \
                            .format(index_val)
226 227
                        log_list_append("\n" + info_message)
                        print info_message
228
                    else:
229 230 231 232 233 234 235 236 237 238 239 240 241 242
                        try:
                            index_val = gstudio_collection.ensure_index(index_fields_list)
                            if index_val:
                                info_message = "{0}... index created successfully." \
                                    .format(index_val)
                            else:
                                info_message = "Index NOT created!"
                            print info_message
                            log_list_append("\n" + info_message)
                        except Exception as e:
                            error_message = "Error in line #{0}: {1}!!!" \
                                .format(EXC_INFO()[-1].tb_lineno, str(e))
                            log_list_append("\n" + error_message)
                            print error_message
243 244 245 246

        except Exception as e:
            error_message = "Error in line #{0}: {1}!!!" \
                .format(EXC_INFO()[-1].tb_lineno, str(e))
247
            log_list_append("\n" + error_message)
248 249 250 251
            print error_message

        finally:
            if log_list:
252
                time.sleep(0.6)
253 254 255 256 257

                log_file_name = "ensure_index" + ".log"
                log_file_path = OS_PATH.join(SCHEMA_ROOT, log_file_name)
                with open(log_file_path, 'a') as log_file:
                    log_file.writelines(log_list)
258 259 260 261

                info_message = "\nIndex Creation Completed ########################\n"
                log_list_append("\n" + info_message)
                print info_message
262
        # --- End of handle() ---