Commit 2e2b43ab authored by Jarod Wang's avatar Jarod Wang Committed by Mitar
parent 5cc431d0
{
"id": "157f962c-3f42-423e-b479-d985474162d9",
"id": "2b8d0f1e-be76-400e-926b-2efbfd12a135",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-12T17:03:46.558669Z",
"context": "EVALUATION",
"created": "2019-05-21T18:34:55.728249Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -80,14 +79,6 @@
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
},
"exclude_columns": {
"type": "VALUE",
"data": [
1,
17,
18
]
}
}
},
......@@ -109,7 +100,17 @@
{
"id": "produce"
}
]
],
"hyperparams": {
"use_semantic_types": {
"type": "VALUE",
"data": true
},
"return_result": {
"type": "VALUE",
"data": "replace"
}
}
},
{
"type": "PRIMITIVE",
......@@ -134,7 +135,7 @@
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/SuggestedTarget"
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
......@@ -143,7 +144,7 @@
"type": "PRIMITIVE",
"primitive": {
"id": "e770fae6-da6d-45f8-86bf-38a121a4e65a",
"version": "3.0.0",
"version": "3.0.1",
"python_path": "d3m.primitives.classification.cover_tree.Fastlvm",
"name": "Nearest Neighbor Classification with Cover Trees"
},
......@@ -193,6 +194,5 @@
}
]
}
],
"pipeline_rank": "1"
}
]
}
\ No newline at end of file
{
"id": "e770fae6-da6d-45f8-86bf-38a121a4e65a",
"version": "3.0.0",
"version": "3.0.1",
"name": "Nearest Neighbor Classification with Cover Trees",
"description": "Classifier implementing the k-nearest neighbors vote using Cover Trees.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.classification.cover_tree.Fastlvm",
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@c77b1413155f4db3b4dea2f99870d155e58322a0#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -216,5 +216,5 @@
}
},
"structural_type": "fastlvm.covertree_classifier.CoverTreeClassifier",
"digest": "cc351533f7f6b7a5546633ef960c991a3c33ab7dd0d2c2b0e1c0cedb4076cc6e"
"digest": "3346d6c52e2ad646d95acd68e85b0b08aa9000ce24c27ff34bad65adca95be3c"
}
{
"id": "0fea2402-abac-4a26-bb56-98d021a0b753",
"id": "216053cd-f72d-445a-bef7-b317bc766fde",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-29T17:02:28.819341Z",
"context": "EVALUATION",
"created": "2019-05-21T18:51:09.394944Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -53,7 +52,16 @@
{
"id": "produce"
}
]
],
"hyperparams": {
"parse_semantic_types": {
"type": "VALUE",
"data": [
"http://schema.org/Integer",
"http://schema.org/Float"
]
}
}
},
{
"type": "PRIMITIVE",
......@@ -78,15 +86,8 @@
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
},
"exclude_columns": {
"type": "VALUE",
"data": [
1,
17,
18
"http://schema.org/Integer",
"http://schema.org/Float"
]
}
}
......@@ -143,7 +144,7 @@
"type": "PRIMITIVE",
"primitive": {
"id": "84f39131-6618-4d90-9590-b79d41dfb093",
"version": "2.2.0",
"version": "2.2.1",
"python_path": "d3m.primitives.classification.search.Find_projections",
"name": "find projections"
},
......@@ -187,6 +188,5 @@
}
]
}
],
"pipeline_rank": "1"
]
}
\ No newline at end of file
{
"id": "84f39131-6618-4d90-9590-b79d41dfb093",
"version": "2.2.0",
"version": "2.2.1",
"name": "find projections",
"description": "Class to search for 2-d projection boxes in raw feature space for discrete(categorical) output (for\nclassification problems) . For discrete output, the algorithm tries to find 2-d projection boxes which can\nseparate out any class of data from the rest with high purity.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.classification.search.Find_projections",
......@@ -31,7 +31,7 @@
},
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/find_projections.git@c544c40b49d76a0d99c5c5499dd9e5c156610f72#egg=find_projections"
"package_uri": "git+https://github.com/autonlab/find_projections.git@09faa5f89789a0b8ec6b4af7e2e0c979d2fd25b9#egg=find_projections"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -271,5 +271,5 @@
}
},
"structural_type": "find_projections.search_projections.Search",
"digest": "2773fa418f16aae21b7a8ef87bf51805ed8c5afad5d9fae9262308ae56cec77d"
"digest": "261a1a1c5faf7aafeca93b4e6c3c0d10a56405f02f284f496f11cb2e76ee6e7d"
}
{
"id": "ebcde203-46ef-403c-9f99-9e91234a2026",
"id": "e674d53a-8a8a-40dd-b988-7b1f3ffc7cde",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-29T18:55:29.543049Z",
"context": "EVALUATION",
"created": "2019-05-21T18:52:13.216586Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -53,7 +52,16 @@
{
"id": "produce"
}
]
],
"hyperparams": {
"parse_semantic_types": {
"type": "VALUE",
"data": [
"http://schema.org/Integer",
"http://schema.org/Float"
]
}
}
},
{
"type": "PRIMITIVE",
......@@ -78,15 +86,8 @@
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
},
"exclude_columns": {
"type": "VALUE",
"data": [
1,
17,
18
"http://schema.org/Integer",
"http://schema.org/Float"
]
}
}
......@@ -143,7 +144,7 @@
"type": "PRIMITIVE",
"primitive": {
"id": "448590e7-8cf6-4bfd-abc4-db2980d8114e",
"version": "2.2.0",
"version": "2.2.1",
"python_path": "d3m.primitives.classification.search_hybrid.Find_projections",
"name": "find projections"
},
......@@ -187,6 +188,5 @@
}
]
}
],
"pipeline_rank": "1"
]
}
\ No newline at end of file
{
"id": "448590e7-8cf6-4bfd-abc4-db2980d8114e",
"version": "2.2.0",
"version": "2.2.1",
"name": "find projections",
"description": "Class to search for 2-d projection boxes in raw feature space for discrete(categorical) output (for\nclassification problems) . For discrete output, the algorithm tries to find 2-d projection boxes which can\nseparate out any class of data from the rest with high purity.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.classification.search_hybrid.Find_projections",
......@@ -31,7 +31,7 @@
},
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/find_projections.git@c544c40b49d76a0d99c5c5499dd9e5c156610f72#egg=find_projections"
"package_uri": "git+https://github.com/autonlab/find_projections.git@09faa5f89789a0b8ec6b4af7e2e0c979d2fd25b9#egg=find_projections"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -285,5 +285,5 @@
}
},
"structural_type": "find_projections.search_projections_hybrid.SearchHybrid",
"digest": "aaa898aa9e6d2fe28abda7f55508e1f0bdcc076889594c0869d9b6e0f3332bad"
"digest": "70411b701ff36f1b65823a29319720cda82b39cd39b830ccb8936102ecd827bc"
}
{
"id": "f6f8017d-080f-4b88-8dfb-a6b578f96bea",
"id": "02e413d7-f5d0-4d5e-90cd-64fb513c9101",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-12T18:22:22.773009Z",
"context": "EVALUATION",
"created": "2019-05-21T18:42:36.780590Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -80,12 +79,6 @@
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
},
"exclude_columns": {
"type": "VALUE",
"data": [
65
]
}
}
},
......@@ -107,13 +100,23 @@
{
"id": "produce"
}
]
],
"hyperparams": {
"use_semantic_types": {
"type": "VALUE",
"data": true
},
"return_result": {
"type": "VALUE",
"data": "replace"
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "49af9397-d9a2-450f-93eb-c3b631ba6646",
"version": "3.0.0",
"version": "3.0.1",
"python_path": "d3m.primitives.clustering.gmm.Fastlvm",
"name": "Gaussian Mixture Models"
},
......@@ -163,6 +166,5 @@
}
]
}
],
"pipeline_rank": "1"
}
]
}
\ No newline at end of file
{
"id": "49af9397-d9a2-450f-93eb-c3b631ba6646",
"version": "3.0.0",
"version": "3.0.1",
"name": "Gaussian Mixture Models",
"description": "This class provides functionality for unsupervised inference on Gaussian mixture model, which is a probabilistic\nmodel that assumes all the data points are generated from a mixture of a finite number of Gaussian distributions\nwith unknown parameters. It can be viewed as a generalization of the K-Means clustering to incorporate\ninformation about the covariance structure of the data. Standard packages, like those in scikit learn run on a\nsingle machine and often only on one thread. Whereas our underlying C++ implementation can be distributed to run\non multiple machines. To enable the distribution through python interface is work in progress. In this class,\nwe implement inference on (Bayesian) Gaussian mixture models using Canopy algorithm. The API is similar to\nsklearn.mixture.GaussianMixture. The class is pickle-able.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.clustering.gmm.Fastlvm",
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@c77b1413155f4db3b4dea2f99870d155e58322a0#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -240,5 +240,5 @@
}
},
"structural_type": "fastlvm.gmm.GMM",
"digest": "56c87c821d1e239b2dbc0b82b9c922dfc38336f02bcdc0de3253eedaed7ab44d"
"digest": "1c18b0887841096424a199a9adf13dc95f5c3c34500114bb16cb2cc58f3aabdd"
}
{
"id": "f3ac5b88-decf-4dd1-bee7-aedb4c1950a6",
"id": "971902a2-71a5-43e8-a594-1f30e91b6ffb",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-12T18:33:08.202953Z",
"context": "EVALUATION",
"created": "2019-05-21T18:45:10.617865Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -80,12 +79,6 @@
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
},
"exclude_columns": {
"type": "VALUE",
"data": [
65
]
}
}
},
......@@ -107,13 +100,23 @@
{
"id": "produce"
}
]
],
"hyperparams": {
"use_semantic_types": {
"type": "VALUE",
"data": true
},
"return_result": {
"type": "VALUE",
"data": "replace"
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "66c3bb07-63f7-409e-9f0f-5b07fbf7cd8e",
"version": "3.0.0",
"version": "3.0.1",
"python_path": "d3m.primitives.clustering.k_means.Fastlvm",
"name": "K-means Clustering"
},
......@@ -159,6 +162,5 @@
}
]
}
],
"pipeline_rank": "1"
}
]
}
\ No newline at end of file
{
"id": "66c3bb07-63f7-409e-9f0f-5b07fbf7cd8e",
"version": "3.0.0",
"version": "3.0.1",
"name": "K-means Clustering",
"description": "This class provides functionality for unsupervised clustering, which according to Wikipedia is 'the task of\ngrouping a set of objects in such a way that objects in the same group (called a cluster) are more similar to\neach other than to those in other groups'. It is a main task of exploratory data mining, and a common technique\nfor statistical data analysis. The similarity measure can be, in general, any metric measure: standard Euclidean\ndistance is the most common choice and the one currently implemented. In future, adding other metrics should not\nbe too difficult. Standard packages, like those in scikit learn run on a single machine and often only on one\nthread. Whereas our underlying C++ implementation can be distributed to run on multiple machines. To enable the\ndistribution through python interface is work in progress. In this class, we implement a K-Means clustering using\nLlyod's algorithm and speed-up using Cover Trees. The API is similar to sklearn.cluster.KMeans. The class is\npickle-able.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.clustering.k_means.Fastlvm",
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@c77b1413155f4db3b4dea2f99870d155e58322a0#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -240,5 +240,5 @@
}
},
"structural_type": "fastlvm.kmeans.KMeans",
"digest": "7ece7c8e3ebd8696eb17139485697c52149eef39730dba176a7aee221e721a4a"
"digest": "8a0784246835d7258eb3622f548662c5c61d8221f432d25d01f68dd1437e469f"
}
{
"id": "71eaa234-f8b6-4fce-a849-64a7404b0723",
"id": "51e27d46-fd25-4c47-a6cf-1d3885a7b7df",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-30T20:49:47.330195Z",
"context": "EVALUATION",
"created": "2019-05-21T18:39:14.563729Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -87,7 +86,7 @@
"type": "PRIMITIVE",
"primitive": {
"id": "a3d490a4-ef39-4de1-be02-4c43726b3b24",
"version": "3.0.0",
"version": "3.0.1",
"python_path": "d3m.primitives.natural_language_processing.glda.Fastlvm",
"name": "Gaussian Latent Dirichlet Allocation Topic Modelling"
},
......@@ -147,7 +146,10 @@
},
"class_weight": {
"type": "VALUE",
"data": "balanced"
"data": {
"case": "str",
"value": "balanced"
}
}
}
},
......@@ -175,6 +177,5 @@
}
]
}
],
"pipeline_rank": "1"
]
}
\ No newline at end of file
{
"id": "a3d490a4-ef39-4de1-be02-4c43726b3b24",
"version": "3.0.0",
"version": "3.0.1",
"name": "Gaussian Latent Dirichlet Allocation Topic Modelling",
"description": "This class provides functionality for unsupervised inference on Gaussian latent Dirichlet allocation,\nwhich replace LDA's parameterization of 'topics' as categorical distributions over opaque word types with\nmultivariate Gaussian distributions on the embedding space. This encourages the model to group words that are a\npriori known to be semantically related into topics, as continuous space word embeddings learned from large,\nunstructured corpora have been shown to be effective at capturing semantic regularities in language. Using\nvectors learned from a domain-general corpus (e.g. English Wikipedia), qualitatively, Gaussian LDA infers\ndifferent (but still very sensible) topics relative to standard LDA. Quantitatively, the technique outperforms\nexisting models at dealing with OOV words in held-out documents. No standard packages exists. Our underlying C++\nimplementation can be distributed to run on multiple machines. To enable the distribution through python\ninterface is work in progress. In this class, we implement inference on Gaussian latent Dirichlet Allocation\nusing Canopy algorithm. In case of full covariance matrices, it exploits the Cholesky decompositions of\ncovariance matrices of the posterior predictive distributions and performs efficient rank-one updates. The API is\nsimilar to sklearn.decomposition.LatentDirichletAllocation.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.natural_language_processing.glda.Fastlvm",
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@c77b1413155f4db3b4dea2f99870d155e58322a0#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -315,5 +315,5 @@
}
},
"structural_type": "fastlvm.glda.GLDA",
"digest": "844ac8725abdf367b6ee5d2082196b8c3ce2a3e5b8af9183b621f5cad18a1f4b"
"digest": "bca9e8f7fdd8a15f279a4467efa178bc739d20d2159e1083516177a3c290a39a"
}
{
"id": "5db2b2cf-ce99-41ee-8394-bd2d2c2af15c",
"id": "6e9edaab-86b7-4408-b052-7e2fcd90ea0c",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-09T17:29:16.155119Z",
"context": "EVALUATION",
"created": "2019-05-21T18:44:01.772596Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -87,7 +86,7 @@
"type": "PRIMITIVE",
"primitive": {
"id": "e582e738-2f7d-4b5d-964f-022d15f19018",
"version": "3.0.0",
"version": "3.0.1",
"python_path": "d3m.primitives.natural_language_processing.hdp.Fastlvm",
"name": "Hierarchical Dirichlet Process Topic Modelling"
},
......@@ -147,7 +146,10 @@
},
"class_weight": {
"type": "VALUE",
"data": "balanced"
"data": {
"case": "str",
"value": "balanced"
}
}
}
},
......@@ -175,6 +177,5 @@
}
]
}
],
"pipeline_rank": "1"
}
]
}
\ No newline at end of file
{
"id": "e582e738-2f7d-4b5d-964f-022d15f19018",
"version": "3.0.0",
"version": "3.0.1",
"name": "Hierarchical Dirichlet Process Topic Modelling",
"description": "This class provides functionality for Hierarchical Dirichlet Process, which is a nonparametric Bayesian model for\ntopic modelling on corpora of documents which seeks to represent the underlying thematic structure of the\ndocument collection. They have emerged as a powerful new technique of finding useful structure in an unstructured\ncollection as it learns distributions over words. The high probability words in each distribution gives us a way\nof understanding the contents of the corpus at a very high level. In HDP, each document of the corpus is assumed\nto have a distribution over K topics, where the discrete topic distributions are drawn from a symmetric dirichlet\ndistribution. As it is a nonparametric model, the number of topics K is inferred automatically. The API is\nsimilar to its parametric equivalent sklearn.decomposition.LatentDirichletAllocation. The class is pickle-able.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.natural_language_processing.hdp.Fastlvm",
......@@ -25,7 +25,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@c77b1413155f4db3b4dea2f99870d155e58322a0#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -280,5 +280,5 @@
}
},
"structural_type": "fastlvm.hdp.HDP",
"digest": "38adf4ed4bbfe1b92b96cd36af5908692459239209d00b530bf184caa7d65d7f"
"digest": "7b62af3e17134c5b310945f09733c00bbada32b0357f510e83927fd38bb5895e"
}
{
"id": "8f368297-e3dc-4337-a8fb-64c0b5749dfb",
"id": "f663b5ff-80cc-4093-8222-257e3a0a11e8",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-04-03T21:09:56.753059Z",
"context": "EVALUATION",
"created": "2019-05-21T18:49:55.668692Z",
"inputs": [
{
"name": "dataset inputs"
......@@ -87,7 +86,7 @@
"type": "PRIMITIVE",
"primitive": {
"id": "f410b951-1cb6-481c-8d95-2d97b31d411d",
"version": "3.0.0",
"version": "3.0.1",
"python_path": "d3m.primitives.natural_language_processing.lda.Fastlvm",
"name": "Latent Dirichlet Allocation Topic Modelling"
},
......@@ -147,7 +146,10 @@
},
"class_weight": {
"type": "VALUE",
"data": "balanced"
"data": {
"case": "str",
"value": "balanced"
}
}
}
},
......@@ -175,6 +177,5 @@
}
]
}
],
"pipeline_rank": "1"
}
]
}
\ No newline at end of file
{
"id": "f410b951-1cb6-481c-8d95-2d97b31d411d",
"version": "3.0.0",
"version": "3.0.1",
"name": "Latent Dirichlet Allocation Topic Modelling",
"description": "This class provides functionality for unsupervised inference on latent Dirichlet allocation, which is a\nprobabilistic topic model of corpora of documents which seeks to represent the underlying thematic structure of\nthe document collection. They have emerged as a powerful new technique of finding useful structure in an\nunstructured collection as it learns distributions over words. The high probability words in each distribution\ngives us a way of understanding the contents of the corpus at a very high level. In LDA, each document of the\ncorpus is assumed to have a distribution over K topics, where the discrete topic distributions are drawn from a\nsymmetric dirichlet distribution. Standard packages, like those in scikit learn are inefficient in addition to\nbeing limited to a single machine. Whereas our underlying C++ implementation can be distributed to run on\nmultiple machines. To enable the distribution through python interface is work in progress. The API is similar to\nsklearn.decomposition.LatentDirichletAllocation.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.natural_language_processing.lda.Fastlvm",
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@c77b1413155f4db3b4dea2f99870d155e58322a0#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -279,5 +279,5 @@
}
},
"structural_type": "fastlvm.lda.LDA",
"digest": "93fb5a3e8c6200e4fb1a201d3625775a270e597159340f4173d34b88e1c14a5d"
"digest": "293b68bd149bd8938e10d757e9e841cae1e0a51c7d629bad0800ee8b89fd36f8"