Generate Fake Email Data

pull/8/head
Benedikt Terhechte 3 years ago
parent 5f4c60bb66
commit 4c86d033ff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,95 @@
# parse all the json files in this folder and generate a rust file
# containing generated data
import glob
import json
import random
import sys
entries = []
output_rust_file = "../src/generated.rs"
# Coalesce data
for json_file in glob.glob('*.json'):
parsed = json.load(open(json_file, "r"))
entries.extend(parsed)
# For each entry, generate a struct
to_address = "john@doe.com"
to_name = ""
struct_template = """Entry { %(fields)s }"""
output_template = """
use super::database::Entry;
pub const ENTRIES: [Entry; %(amount)s] = [
%(content)s
];
"""
# To generate some more data, we keep some email addresses to
# generate 10-12 additional emails with that address afterwards
additional_emails = []
generated_entries = []
def fields_from_entry(entry):
k = {}
k["sender_name"] = entry["name"]
email = entry["email"].split("@")
k["sender_domain"] = email[1]
k["sender_local_part"] = email[0]
date = entry["date"].split(",")
(k["year"], k["month"], k["day"]) = (int(date[0]), int(date[1]), int(date[2]))
k["timestamp"] = int(entry["time"])
k["is_reply"] = True if entry["reply"] == 1 else False
k["is_send"] = True if entry["send"] == 1 else False
k["subject"] = entry["subject"]
k["to_address"] = to_address
k["to_name"] = to_name
return k
def fields_to_string(k):
fields = []
for key in k:
value = k[key]
if type(value) == type(0):
fields.append("%s: %s" % (key, value))
elif type(value) == type(True):
fields.append("%s: %s" % (key, "true" if value == True else "false"))
elif type(value) == type(""):
fields.append("%s: \"%s\"" % (key, value))
elif type(value) == type(u""):
fields.append("%s: \"%s\"" % (key, value))
else:
print(value, type(value))
sys.exit(0)
return ", ".join(fields)
# first run over the emails
for entry in entries:
k = fields_from_entry(entry)
# Generate additional mails
if random.uniform(0.0, 1.0) > 0.7:
for _ in range(0, random.randint(5, 50)):
additional_emails.append((entry["email"], entry["name"]))
joined_fields = fields_to_string(k)
generated_entries.append(struct_template % { "fields": joined_fields })
# second run over the email to generate additional entries with the same
# email address so we have some clusters
for (entry, (email, name)) in zip(entries, additional_emails):
entry["email"] = email
entry["name"] = name
k = fields_from_entry(entry)
joined_fields = fields_to_string(k)
generated_entries.append(struct_template % { "fields": joined_fields })
writer = open(output_rust_file, "w")
entries_string = ",\n".join(generated_entries)
writer.write(output_template % { "content": entries_string, "amount": len(generated_entries) })
writer.close()

File diff suppressed because it is too large Load Diff

@ -6,6 +6,9 @@ use ps_gui::{eframe, PostsackApp};
mod database;
#[cfg(target_arch = "wasm32")]
mod generated;
#[cfg(target_arch = "wasm32")]
use wasm_bindgen::prelude::*;

Loading…
Cancel
Save