Commit e21d80e7 authored by Erik Malone's avatar Erik Malone

Initial commit.

parents
from bs4 import BeautifulSoup
from pathlib import Path
from tqdm import tqdm
import markovify, json
addends = { 'davedan': 'suck my salty balls cracker' }
def parse_file( fname ):
with open( fname, encoding = 'utf-8' ) as f:
return BeautifulSoup( f.read(), 'html.parser' )
def get_posts( fname ):
return parse_file( fname ).find_all( 'div',
attrs = { 'class': 'list_posts' } )
def strip_quotes( posts ):
for post in posts:
blocks = post('blockquote',
attrs = { 'class': 'bbc_standard_quote' } )
headers = post('div',
attrs = { 'class':'quoteheader' } )
footers = post( 'div',
attrs = { 'class': 'quotefooter' } )
[x.decompose() for x in headers]
[x.decompose() for x in blocks]
[x.decompose() for x in footers]
return posts
def mine_file( fname ):
print( f'Mining file {fname}.' )
posts = strip_quotes( get_posts( fname ) )
txts = []
for post in posts:
txts.append( post.get_text().strip() )
return txts
def mine_folder( folder ):
data = []
for name in Path( f'E:/Chaositech/{folder}' ).glob( 'posts*.txt' ):
data.append( '\n'.join( mine_file( name ) ) )
return '\n'.join( data )
def make_model( corpus ):
model = markovify.Text( corpus )
js = json.dumps( json.loads( model.to_json() ), indent = 2 )
return model, js
def save_model( folder, js ):
with open( f'E:/Chaositech/{folder}/model.json', 'w' ) as f:
print( f'Saving file to {f.name}' )
f.write( js )
def gen_sentences( model,
n = 5,
do_quote = True,
author = '' ):
for _ in range( n ):
if ( author in addends ):
if ( do_quote ):
yield f'[quote author={author}]\n{model.make_sentence()} {addends[author]}\n[/quote]\n\n'
else:
yield f'{model.make_sentence()} {addends[author]}\n\n'
else:
if ( do_quote ):
yield f'[quote author={author}]\n{model.make_sentence()}\n[/quote]\n\n'
else:
yield f'{model.make_sentence()}\n\n'
def load_model( folder ):
with open( f'E:/Chaositech/{folder}/model.json' ) as f:
return markovify.Text.from_json( str( f.read() ) )
def print_sentences( model,
n = 5,
do_quote = True,
author = '' ):
for s in gen_sentences( model, n, do_quote, author ):
print( s )
def gen_sentences_for_all( n = 5,
do_quote = True ):
paths = list( Path( './' ).glob( './**/' ) )
paths.remove( Path( '.' ) )
sentences = []
for folder in paths:
author = folder.name
model = load_model( author )
for s in gen_sentences( model, n, do_quote, author ):
sentences.append( s )
return sentences
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment