Commit fdc0460a authored by Ilya Prokhorov's avatar Ilya Prokhorov

Gopnik dataset added, stalin dataset removed, unique phrases generator added

parent 4b633f14
import re
import sys
regexp = "wall_post_text zoom_text\">(.*?)\</div"
filename = sys.argv[1]
file = open(filename, "r")
entireFile = file.read()
file.close()
match = re.findall(regexp, entireFile)
formattedMatches = []
for m in match:
m = m.replace("<br>"," ")
if m.startswith("<a href"):
continue
print(m)
formattedMatches.append(m)
file = open("output.txt","w")
file.write("\n".join(formattedMatches))
file.close()
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -12,24 +12,19 @@ print("Textgenrnn quotes text generator by demensdeum 2018 (demensdeum@gmail.com
command_mode = False
if len(sys.argv) == 2:
if len(sys.argv) > 1:
print("Silent mode enabled")
command_mode = True
command_mode_state = sys.argv[1]
command_mode_state_array = command_mode_state.split(",")
if len(command_mode_state_array) != 2:
print("Incorrect silent mode, must be - mode,dataset_filename - for example: python main.py generate,10")
print("Modes: train_reset/train_resume/generate")
exit(4)
if command_mode == False:
state = input("train_reset/train_resume/generate? ")
state = input("train_reset/train_resume/generate/generate_unique? ")
else:
state = command_mode_state_array[0]
if state != "train_reset" and state != "train_resume" and state != "generate":
print("Write train_reset or train_resume or generate... Exit")
if state != "train_reset" and state != "train_resume" and state != "generate" and state != "generate_unique":
print("Write train_reset or train_resume or generate or generate_unique... Exit")
exit(1)
if state == "train_reset":
......@@ -47,7 +42,7 @@ if command_mode == False:
dataset_file = input("dataset filename? (%s) " % default_dataset_filename)
if len(dataset_file) < 1:
dataset_file = default_dataset_filename
elif state == "generate":
elif state == "generate" or state == "generate_unique":
tries = input("tries? (1) ")
if len(tries) > 0:
try:
......@@ -65,14 +60,15 @@ if state == "train_reset" or state == "train_resume":
while True:
print("Endless train mode, every 4 epochs will be saved. CTRL+C to exit")
try:
textgen.train_from_file(dataset_file, num_epochs=4, new_model = reset_model)
textgen.train_from_file(dataset_file, num_epochs = 4, batch_size = 8096, new_model = reset_model)
except KeyboardInterrupt:
print("\nKilled")
exit(0)
except:
print("Crashed... Recovery")
elif state == "generate":
elif state == "generate" or state == "generate_unique":
if path.exists(weights_filename) == False:
print("There is no weights to generate, train first... Exit")
exit(3)
......@@ -83,7 +79,39 @@ elif state == "generate":
tries = int(tries)
except:
print("Incorrect generate phrase tries number - must be integer number, for example - generate,10")
if state == "generate_unique":
if command_mode == True:
if len(sys.argv) != 3:
print("Incorrect generate_unique format, must be - generate_unique,10,dataset.txt")
exit(5)
else:
dataset_file = sys.argv[2]
else:
dataset_file = input("dataset filename? (%s) " % default_dataset_filename)
if len(dataset_file) < 1:
dataset_file = default_dataset_filename
dataset_file_descriptor = open(dataset_file,"r")
dataset_file_text = dataset_file_descriptor.read()
dataset_file_text_set = set(dataset_file_text.split("\n"))
if len(dataset_file_text_set) < 1:
print("Incorrent dataset... Exit")
exit(6)
for tryStep in range(0, tries):
textgen = textgenrnn(weights_path = weights_filename, vocab_path = vocab_filename, config_path = config_filename)
textgen.generate(temperature = uniform(0.8, 1.5))
\ No newline at end of file
if state == "generate":
generated = textgen.generate(temperature = uniform(0.8, 1.5), return_as_list=True)
print(generated[0])
elif state == "generate_unique":
generated = list(dataset_file_text_set)[0]
while generated in dataset_file_text_set:
generated = textgen.generate(temperature = uniform(0.8, 1.5), return_as_list=True)
generated = generated[0]
print(generated)
exit(0)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment