!pip install fakerRequirement already satisfied: faker in /usr/local/lib/python3.12/dist-packages (37.12.0)
Requirement already satisfied: tzdata in /usr/local/lib/python3.12/dist-packages (from faker) (2025.2)
Requirement already satisfied: faker in /usr/local/lib/python3.12/dist-packages (37.12.0)
Requirement already satisfied: tzdata in /usr/local/lib/python3.12/dist-packages (from faker) (2025.2)
def create_hospital_data(seed = 123):
fake.seed_instance(seed)
nama_rs = []
for i in range (2):
nama = fake.last_name()
nama_rs.append('RS ' +fake.last_name())
address_rs =[]
for i in range (2):
address = fake.address()
address_rs.append(fake.address().replace("\n", ", "))
hospital_dict = {'hospital_name':nama_rs, 'hospital_address': address_rs}
data = pd.DataFrame(data=hospital_dict)
return data| hospital_name | hospital_address | |
|---|---|---|
| 0 | RS Santoso | Gg. Cikutra Timur No. 571, Bekasi, Jawa Tengah... |
| 1 | RS Wasita | Jalan H.J Maemunah No. 174, Magelang, NB 17695 |
def create_specializations(seed=123):
fake.seed_instance(seed)
specialization_name = ['General Practitioners', 'Pediatricians','Cardiologists','Dermatologists', 'Orthopedic']
specialization_desc = fake.texts(nb_texts=5, max_nb_chars=50)
spec_dict = {'specialization_name':specialization_name,
'specialization _desc':specialization_desc}
data = pd.DataFrame(data=spec_dict)
return data| specialization_name | specialization _desc | |
|---|---|---|
| 0 | General Practitioners | Eaque quisquam eaque. Fugit natus exercitationem. |
| 1 | Pediatricians | Cum temporibus quo soluta fugiat. |
| 2 | Cardiologists | Pariatur commodi tenetur eos. |
| 3 | Dermatologists | Reprehenderit odit natus vero accusamus. |
| 4 | Orthopedic | Mollitia sunt quam harum quod accusamus. |
def create_patients(seed=123, n=100):
fake.seed_instance(seed)
random.seed(seed)
names = [fake.first_name() + " " + fake.last_name() for i in range (n)]
gender_choices = ['Female','Male']
genders = [random.choice(gender_choices) for i in range (n)]
birthdates = [fake.date_of_birth(minimum_age = 15, maximum_age = 90) for i in range (n)]
contacts = ['08'+ fake.numerify(text="##########") for i in range(n)]
patient_dict = {'patient_name': names,
'patient_gender': genders,
'patient_birthdate': birthdates,
'patient_contact': contacts}
data = pd.DataFrame(data = patient_dict)
return data| patient_name | patient_gender | patient_birthdate | patient_contact | |
|---|---|---|---|---|
| 0 | Bala Santoso | Female | 1977-02-26 | 089490563144 |
| 1 | Cayadi Wasita | Male | 1960-12-19 | 082656090723 |
| 2 | Kasiran Kurniawan | Female | 1942-10-26 | 080482834773 |
| 3 | Bahuwarna Wibowo | Male | 1990-01-21 | 084811798843 |
| 4 | Gasti Purwanti | Male | 1951-12-13 | 088166710966 |
| ... | ... | ... | ... | ... |
| 95 | Uli Hartati | Female | 1962-12-16 | 081993230755 |
| 96 | Lulut Aryani | Male | 2006-06-05 | 083668184629 |
| 97 | Asman Manullang | Female | 1980-04-12 | 086253589902 |
| 98 | Endah Simbolon | Male | 1937-04-19 | 086571070906 |
| 99 | Dina Mulyani | Female | 1973-03-02 | 081212534333 |
100 rows × 4 columns
def create_doctors(seed=202, n=25, hospitals=None, specializations=None):
fake.seed_instance(seed)
random.seed(seed)
names = [fake.first_name() + " " + fake.last_name() for i in range (n)]
hospital_id = [random.randint(1, len(hospitals)) for i in range (n)]
specialization_id = [random.randint(1, len(specializations)) for i in range (n)]
doctors_dict = {'hospital_id':hospital_id,
'specialization_id': specialization_id,
'doctor_name': names}
data = pd.DataFrame(doctors_dict)
return data| hospital_id | specialization_id | doctor_name | |
|---|---|---|---|
| 0 | 2 | 4 | Prasetya Yuliarti |
| 1 | 2 | 4 | Wakiman Wasita |
| 2 | 2 | 4 | Harto Napitupulu |
| 3 | 1 | 3 | Vicky Widodo |
| 4 | 1 | 1 | Balidin Aryani |
| 5 | 2 | 2 | Timbul Hartati |
| 6 | 1 | 1 | Bagus Nuraini |
| 7 | 2 | 1 | Martana Pangestu |
| 8 | 2 | 4 | Jaswadi Lailasari |
| 9 | 2 | 3 | Rini Suryatmi |
| 10 | 1 | 1 | Syahrini Nurdiyanti |
| 11 | 2 | 2 | Harjo Wacana |
| 12 | 1 | 2 | Martani Kusmawati |
| 13 | 1 | 2 | Edi Anggraini |
| 14 | 2 | 1 | Wadi Wahyuni |
| 15 | 1 | 3 | Bagya Nurdiyanti |
| 16 | 2 | 2 | Jamal Rajasa |
| 17 | 2 | 2 | Tugiman Halimah |
| 18 | 2 | 2 | Azalea Saputra |
| 19 | 1 | 4 | Bakijan Nugroho |
| 20 | 2 | 2 | Ratih Januar |
| 21 | 2 | 3 | Bahuwarna Wibowo |
| 22 | 1 | 3 | Karma Anggriawan |
| 23 | 1 | 2 | Garang Prabowo |
| 24 | 2 | 3 | Hadi Maheswara |
def create_doctor_schedule(doctors, seed=123):
random.seed(seed)
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
start_end_pairs = [
(time(8, 0), time(12, 0)),
(time(13, 0), time(17, 0)),
(time(9, 0), time(13, 0)),
(time(10, 0), time(14, 0))
]
records = []
for index, doctor in doctors.iterrows():
doctor_id = index + 1
# pilih 3 hari unik secara acak
work_days = random.sample(days, 3)
for day in work_days:
start_time, end_time = random.choice(start_end_pairs)
records.append({
'doctor_id': doctor_id,
'day_of_week': day,
'start_time': start_time,
'end_time': end_time
})
df = pd.DataFrame(records)
return df| doctor_id | day_of_week | start_time | end_time | |
|---|---|---|---|---|
| 0 | 1 | Monday | 10:00:00 | 14:00:00 |
| 1 | 1 | Wednesday | 09:00:00 | 13:00:00 |
| 2 | 1 | Saturday | 08:00:00 | 12:00:00 |
| 3 | 2 | Monday | 09:00:00 | 13:00:00 |
| 4 | 2 | Thursday | 08:00:00 | 12:00:00 |
| ... | ... | ... | ... | ... |
| 70 | 24 | Monday | 10:00:00 | 14:00:00 |
| 71 | 24 | Tuesday | 08:00:00 | 12:00:00 |
| 72 | 25 | Saturday | 08:00:00 | 12:00:00 |
| 73 | 25 | Tuesday | 09:00:00 | 13:00:00 |
| 74 | 25 | Thursday | 10:00:00 | 14:00:00 |
75 rows × 4 columns
def create_appointment_slots(doctor_schedule, slot_duration_minutes=30):
slots = []
for index, sched in doctor_schedule.iterrows():
start = sched['start_time']
end = sched['end_time']
current = datetime.combine(datetime.today(), start)
while current.time() < end:
slot_start = current.time()
slot_end = (current + timedelta(minutes=slot_duration_minutes)).time()
if slot_end > end:
break
slots.append({
'schedule_id': index + 1,
'slot_time_start': slot_start,
'slot_time_end': slot_end
})
current += timedelta(minutes=slot_duration_minutes)
return pd.DataFrame(slots)| schedule_id | slot_time_start | slot_time_end | |
|---|---|---|---|
| 0 | 1 | 10:00:00 | 10:30:00 |
| 1 | 1 | 10:30:00 | 11:00:00 |
| 2 | 1 | 11:00:00 | 11:30:00 |
| 3 | 1 | 11:30:00 | 12:00:00 |
| 4 | 1 | 12:00:00 | 12:30:00 |
| ... | ... | ... | ... |
| 595 | 75 | 11:30:00 | 12:00:00 |
| 596 | 75 | 12:00:00 | 12:30:00 |
| 597 | 75 | 12:30:00 | 13:00:00 |
| 598 | 75 | 13:00:00 | 13:30:00 |
| 599 | 75 | 13:30:00 | 14:00:00 |
600 rows × 3 columns
from datetime import datetime, timedelta
import random
import pandas as pd
def create_appointments(patients, appointment_slots, n=50, seed=123,
past_ratio=0.7, past_days=60, future_days=30):
"""
Membuat dummy data untuk tabel appointments.
Args:
patients (DataFrame): tabel pasien.
appointment_slots (DataFrame): tabel slot dokter.
n (int): jumlah total appointment yang dihasilkan.
seed (int): untuk reproducibility.
past_ratio (float): proporsi appointment masa lalu.
past_days (int): rentang hari ke belakang.
future_days (int): rentang hari ke depan.
Returns:
DataFrame: appointments dengan kolom patient_id, slot_id, appointment_date, appointment_status.
"""
random.seed(seed)
appointments = []
patient_ids = list(range(1, len(patients) + 1))
slot_ids = list(range(1, len(appointment_slots) + 1))
for _ in range(n):
patient_id = random.choice(patient_ids)
slot_id = random.choice(slot_ids)
# tentukan apakah ini di masa lalu atau depan
if random.random() < past_ratio:
delta_days = -random.randint(1, past_days) # mundur ke belakang
else:
delta_days = random.randint(1, future_days) # ke depan
appointment_date = datetime.today().date() + timedelta(days=delta_days)
# tentukan status berdasarkan tanggal
if appointment_date < datetime.today().date():
status = random.choices(['Completed', 'Cancelled'], weights=[0.8, 0.2])[0]
else:
status = random.choices(['Scheduled', 'Cancelled'], weights=[0.9, 0.1])[0]
appointments.append({
'patient_id': patient_id,
'slot_id': slot_id,
'appointment_date': appointment_date,
'appointment_status': status
})
# hapus duplikat slot_id + tanggal
df = pd.DataFrame(appointments).drop_duplicates(subset=['slot_id', 'appointment_date'])
return df.reset_index(drop=True)| patient_id | slot_id | appointment_date | appointment_status | |
|---|---|---|---|---|
| 0 | 7 | 275 | 2025-10-13 | Completed |
| 1 | 5 | 389 | 2025-10-18 | Completed |
| 2 | 7 | 164 | 2025-10-04 | Completed |
| 3 | 32 | 168 | 2025-10-12 | Completed |
| 4 | 77 | 387 | 2025-10-19 | Completed |
| 5 | 14 | 45 | 2025-10-30 | Completed |
| 6 | 3 | 299 | 2025-11-28 | Scheduled |
| 7 | 61 | 38 | 2025-11-20 | Scheduled |
| 8 | 62 | 212 | 2025-11-30 | Scheduled |
| 9 | 41 | 13 | 2025-09-20 | Completed |
| 10 | 56 | 552 | 2025-09-27 | Completed |
| 11 | 63 | 534 | 2025-10-16 | Completed |
| 12 | 97 | 186 | 2025-11-12 | Cancelled |
| 13 | 85 | 269 | 2025-10-18 | Completed |
| 14 | 10 | 468 | 2025-09-20 | Completed |
| 15 | 25 | 91 | 2025-11-21 | Scheduled |
| 16 | 46 | 235 | 2025-10-07 | Completed |
| 17 | 8 | 295 | 2025-12-04 | Scheduled |
| 18 | 30 | 558 | 2025-10-28 | Completed |
| 19 | 69 | 456 | 2025-10-10 | Completed |
| 20 | 18 | 468 | 2025-10-23 | Completed |
| 21 | 88 | 279 | 2025-11-30 | Scheduled |
| 22 | 14 | 65 | 2025-09-24 | Completed |
| 23 | 56 | 9 | 2025-11-22 | Scheduled |
| 24 | 89 | 414 | 2025-10-19 | Completed |
| 25 | 61 | 34 | 2025-11-20 | Scheduled |
| 26 | 4 | 168 | 2025-11-23 | Scheduled |
| 27 | 87 | 148 | 2025-10-24 | Completed |
| 28 | 81 | 59 | 2025-11-22 | Scheduled |
| 29 | 19 | 299 | 2025-11-22 | Scheduled |
| 30 | 69 | 346 | 2025-09-29 | Completed |
| 31 | 76 | 396 | 2025-09-22 | Completed |
| 32 | 86 | 22 | 2025-09-14 | Completed |
| 33 | 93 | 462 | 2025-09-19 | Completed |
| 34 | 21 | 535 | 2025-10-06 | Completed |
| 35 | 18 | 352 | 2025-10-05 | Completed |
| 36 | 29 | 584 | 2025-10-22 | Cancelled |
| 37 | 32 | 209 | 2025-11-18 | Scheduled |
| 38 | 65 | 83 | 2025-11-26 | Scheduled |
| 39 | 3 | 199 | 2025-10-25 | Completed |
| 40 | 41 | 86 | 2025-10-05 | Completed |
| 41 | 69 | 139 | 2025-11-25 | Scheduled |
| 42 | 51 | 29 | 2025-09-16 | Cancelled |
| 43 | 63 | 349 | 2025-10-25 | Cancelled |
| 44 | 17 | 7 | 2025-12-05 | Scheduled |
| 45 | 64 | 352 | 2025-11-01 | Completed |
| 46 | 47 | 472 | 2025-09-27 | Cancelled |
| 47 | 41 | 368 | 2025-10-11 | Completed |
| 48 | 97 | 198 | 2025-11-15 | Scheduled |
| 49 | 8 | 425 | 2025-10-23 | Cancelled |
def create_medical_records(appointments):
records = []
# Filter hanya appointment yang sudah completed
completed_appointments = appointments[appointments["appointment_status"] == "Completed"]
for index, appt in completed_appointments.iterrows():
records.append({
"appointment_id": index + 1,
"diagnosis": fake.text(max_nb_chars=50), # kalimat acak
"treatment": fake.text(max_nb_chars=60),
})
return pd.DataFrame(records)| appointment_id | diagnosis | treatment | |
|---|---|---|---|
| 0 | 1 | Quae nihil architecto ex fuga ipsam. | Voluptas veniam ex modi quam. Iure quae provid... |
| 1 | 2 | Architecto non ducimus culpa saepe at cum. | Voluptates et cum dolorum odio dignissimos. |
| 2 | 3 | Non nobis eos dolorem fugit inventore nam totam. | Ducimus ducimus exercitationem inventore et fa... |
| 3 | 4 | Rem laudantium odit ratione nulla sint odio. | Distinctio ullam ab recusandae impedit unde do... |
| 4 | 5 | Saepe alias laboriosam. | Expedita autem sapiente sint quidem. |
| 5 | 6 | Soluta voluptatem culpa eos. | Quibusdam quibusdam labore magni fuga labore i... |
| 6 | 10 | Similique illo quia laboriosam inventore quo. | Saepe adipisci cum illum. Est quisquam illo. |
| 7 | 11 | Cum soluta dolorem facere numquam expedita. | Tenetur quam iusto cum quisquam. |
| 8 | 12 | Eius id nemo expedita optio officiis. | Ex reprehenderit sapiente debitis. |
| 9 | 14 | Vel saepe soluta tempore. | Non hic adipisci doloremque voluptas. Rem ad a... |
| 10 | 15 | Saepe porro vitae ad eius tenetur sequi. | Facilis corporis cumque. Eum sed voluptates ex... |
| 11 | 17 | Doloremque veritatis esse laboriosam et possimus. | Ipsum voluptates eveniet fugiat vel reprehende... |
| 12 | 19 | Reprehenderit esse assumenda ducimus. | Necessitatibus doloremque illum vel iusto aspe... |
| 13 | 20 | Laborum illo amet corrupti. | Quaerat reprehenderit molestiae non numquam ex... |
| 14 | 21 | Saepe perspiciatis laborum repudiandae. | Sunt nemo cum aut. Saepe reiciendis exercitati... |
| 15 | 23 | At occaecati vero vel. | Saepe laudantium voluptas quia sed. |
| 16 | 25 | A tenetur illum. | Ea tempore quo doloribus. Numquam iure recusan... |
| 17 | 28 | Nihil animi eius recusandae ut debitis. | Modi aperiam totam. Numquam animi quia. |
| 18 | 31 | Assumenda quam voluptates voluptate et eos. | Quibusdam modi labore quaerat voluptas. |
| 19 | 32 | Unde accusamus quidem aliquid nam impedit. | Ipsam numquam occaecati ipsum tempora fugiat. |
| 20 | 33 | Reiciendis odio qui itaque. | Facere eligendi eligendi harum. |
| 21 | 34 | Nihil nisi at consequuntur ipsum. | Maxime ullam culpa eius velit laboriosam iure. |
| 22 | 35 | Odit totam cum in temporibus accusamus. | Quos mollitia laboriosam. |
| 23 | 36 | Excepturi quia accusantium assumenda. | Praesentium illum eius corrupti tempore vel. |
| 24 | 40 | Libero pariatur facilis natus porro praesentium. | Non possimus harum pariatur. |
| 25 | 41 | Fugiat officiis quo. | Deserunt provident commodi accusamus atque. |
| 26 | 46 | Omnis similique excepturi. | In pariatur reprehenderit voluptatibus dolores. |
| 27 | 48 | Fuga soluta quia deserunt nihil. | Eaque voluptatum praesentium possimus. |
patients.to_csv("patients.csv", index=False)
hospitals.to_csv("hospitals.csv", index=False)
specializations.to_csv("specializations.csv", index=False)
doctors.to_csv("doctors.csv", index=False)
doctor_schedule.to_csv("doctor_schedule.csv", index=False)
appointment_slots.to_csv("appointment_slots.csv", index=False)
appointments.to_csv("appointments.csv", index=False)
medical_records.to_csv("medical_records.csv", index=False)