1.Generate_json
In [ ]:
Copied!
# !pip install Faker
# !pip install Faker
In [ ]:
Copied!
from faker import Faker
from faker.providers import DynamicProvider
import random
import pandas as pd
job_provider = DynamicProvider(
provider_name="job",
elements=[
"student", "doctor", "nurse", "teacher", "software enginerr",
"data science", "data engineer", "tester", "data analyst",
"lawyer", "mechanic", "accountant", "sales", "chef", "police",
"architect", "graphic designer", "plumber", "marketing", "dentist",
"electrician"
],
)
fake = Faker()
fake.add_provider(job_provider)
from faker import Faker
from faker.providers import DynamicProvider
import random
import pandas as pd
job_provider = DynamicProvider(
provider_name="job",
elements=[
"student", "doctor", "nurse", "teacher", "software enginerr",
"data science", "data engineer", "tester", "data analyst",
"lawyer", "mechanic", "accountant", "sales", "chef", "police",
"architect", "graphic designer", "plumber", "marketing", "dentist",
"electrician"
],
)
fake = Faker()
fake.add_provider(job_provider)
In [ ]:
Copied!
column = ["uid", "name", "age", "job"]
column = ["uid", "name", "age", "job"]
In [ ]:
Copied!
data = []
for i in range(2000):
data.append(
("id"+str(i).rjust(5, "0"), fake.name(), random.randint(10, 70), fake.job())
)
data = []
for i in range(2000):
data.append(
("id"+str(i).rjust(5, "0"), fake.name(), random.randint(10, 70), fake.job())
)
In [ ]:
Copied!
df = pd.DataFrame(data, columns=column)
df = pd.DataFrame(data, columns=column)
In [ ]:
Copied!
df.to_json('./dataset/raw.json', index=False)
df.to_csv('./dataset/raw.csv', index=False)
df.to_json('./dataset/raw.json', index=False)
df.to_csv('./dataset/raw.csv', index=False)
In [ ]:
Copied!
df
df
In [ ]:
Copied!