map_to_ontology.py 5.28 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from collections import defaultdict
from difflib import get_close_matches

from anytree import LevelOrderIter

from src.constants import ontology, column

TMP_CATEGORY = "tmp_category"

DICT_CATEGORY_TO_MODALITIES = {
    ontology.TRAVEL_AND_HOSPITALITY: [
        'HEBERGEMENT RESTAURATION TRANSPORT',

    ],
    ontology.HOSPITALITY: [
        'HOSPITALITE',
        'FRAIS D HOSPITALITE',
        'HOSPITALITE REUNION PROFESSIONNELLE',
    ],
    ontology.MEAL_AND_DRINK: [
        'REPAS',
        'RESTAURATION',
        'FRAIS DE BOUCHE',
        'INVITATION REPAS',
        'REPAS PROFESSIONNEL',
        'REPAS PROFESSIONNELS',
        'FRAIS DE REPAS',

        'DINER',
        'INVITATION A DEJEUNER DINER',
        'RESTAURATION DINER',
        'DINER REPAS',

        'FORMATION MEDICALE CONTINUE',

        'REPAS REUNION SCIENTIFIQUE',

        'REPAS REUNION',
        'REPAS MANIFESTATION DE FORMATION',
        'FRAIS DE RESTAURATION RP',
        'FRAIS D HOSPITALITE RP',
        'DINER REUNION',
        'RP',

        'DEJEUNER REUNION',

        'REPAS IMPROMPTU',
        'DEJEUNER',
        'DEJEUNER REPAS',
        'RELATIONS NORMALES DE TRAVAIL',
        'RNT',
        'REPAS RELATIONS NORMALES DE TRAVAIL',
        'REPAS RELATION NORMALE DE TRAVAIL',
        'REPAS RNT',
        'REPAS DE TRAVAIL',
        'DEJEUNER D OPPORTUNITE',
        'REPAS D AFFAIRE',

        'STAFF',
        'RESTAURATION STAFF',
        'REPAS STAFF',
        'HOSPITALITE STAFF',
        'REPAS REUNION HOSPITALIERE',
        'COLLATION STAFF',
        'STAFF REPAS',

        'RESTAURATION BUFFET',
        'TRAITEUR',
    ],
    ontology.TRAVEL: [
        'TRANSPORT',
        'FRAIS DE DEPLACEMENT',
        'FRAIS DE DA A PLACEMENT',
        'FRAIS DE TRANSPORT',
        'ACHEMINEMENT',
        'VOYAGES',
        'FRAIS REMBOURSES TRANSPORT',

    ],
    ontology.ACCOMMODATION: [
        'HEBERGEMENT',
        'HA BERGEMENT',
        'HA A BERGEMENT',
        'HOTEL',
        'HE BERGEMENT',
        'HEBERGEMENT SUR TOUTE LA DUREE DU CONGRES',
        'HOTEL HEBERGEMENT',
        'H BERGEMENT',
    ],
    ontology.EVENT: [
        'PARTICIPATION EVENEMENT',
        'PARTICIPATION EVENEMENT SCIENTIFIQUE',
        'FRAIS DE CONGRES',
        'CONGRES',
        'REPAS CONGRES',

        'INSCRIPTION',
        'FRAIS D INSCRIPTION',
        'INSCRIPTIONS',
        'INVITATIONS INSCRIPTIONS',

        'INVITATIONS',
        'INVITATION',

        'INSCRIPTION EVENEMENT SCIENTIFIQUE',
        'INSCRIPTION CONGRES',
        'INSCRIPTION CONGRES SCIENTIFIQUE',
        'FRAIS D INSCRIPTION CONGRES',

        'HOSPITALITE EVENEMENT SCIENTIFIQUE',
        'HOSPITALITE CONGRES SYMPOSIUM',

        'HOSPITALITE INSCRIPTION',
        'HOSPITALITE CONGRES',

    ],
117
    ontology.FEE: [
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
        'ENQUETE',
        'HONORAIRES ETUDES',
        'ENQUETE TELEPHONIQUE',
        'ENQUETE WEB',
        'ENQUETE INTERNET',
        'HONORAIRES',
        'HONORAIRE',
        'HONORAIRES MEDECIN INVESTIGATEUR',
    ],
    ontology.CASH_DONATION: [
        'DONS DE FONCTIONNEMENT',
        'DON DE FONCTIONNEMENT'
    ],
    ontology.DONATION_IN_KIND: [
        'DONS DE MATERIEL',
        'DON DE MATERIEL'
        'ECHANTILLON',
    ],
    ontology.DONATION: [
        'DON',
    ],
    ontology.GRANT: [
        'SUBVENTION',
    ],
    ontology.GIFT: [
        'CADEAUX',
        'CADEAU',
        'LIVRE',
        'CHAMPAGNE',
        'BOITE DE CHOCOLAT',
    ],
    ontology.RELATED_EXPENSE: [
        'REMBOURSEMENT DE FRAIS',
        'DEDOMMAGEMENT',
        'FRAIS DIVERS',

        'REMBOURSEMENT FRAIS LOGISTIQUE',

    ],
    ontology.TRAINING: [
        'FORMATION',
        'FRAIS DE FORMATION',
    ],
    ontology.EMPTY_AND_OTHER: [
        ontology.NO_INFORMATION,
        ''
    ]
}


def get_reversed_dict(value_to_list):
    mapping = dict()
    for value, list_ in value_to_list.items():
        for list_element in list_:
            mapping[list_element] = value
    return mapping


def get_strict_mapper_to_category():
    modality_to_category = get_reversed_dict(DICT_CATEGORY_TO_MODALITIES)
    for modality, category in modality_to_category.items():
        modality_to_category[modality] = category
    return defaultdict(lambda: ontology.UNSUCESSFULL_MAPPING, modality_to_category)


def get_similarity_mapper_to_category(s_detail):
    mapping = get_strict_mapper_to_category()
    modalities = mapping.keys()

    for detail in s_detail.unique():
        match = get_close_matches(detail, modalities, n=1, cutoff=0.8)
        if match:
            mapping[detail] = mapping[match[0]]
    return mapping


def get_mapper_to_fist_level_parent(root_node):
    fist_level_categories = root_node.children
    mapping = dict()
    for fist_level_node in fist_level_categories:
        for node in LevelOrderIter(fist_level_node):
            mapping[node] = fist_level_node
    return mapping


def get_category(df):
    mapping = get_similarity_mapper_to_category(df[column.DETAIL])
    return df[column.DETAIL].map(mapping)


def get_precise_category(df, category_column=TMP_CATEGORY):
    return df[category_column].map(lambda c: c.name_fr)


def get_general_category(df, category_column=TMP_CATEGORY):
213
    mapping_to_fist_level = get_mapper_to_fist_level_parent(ontology.INTEREST)
214
    return df[category_column].map(lambda c: mapping_to_fist_level[c].name_fr)