!pip install faker
Requirement already satisfied: faker in /usr/local/lib/python3.12/dist-packages (37.12.0)
Requirement already satisfied: tzdata in /usr/local/lib/python3.12/dist-packages (from faker) (2025.2)
from faker import Faker
import random
import pandas as pd
from datetime import datetime, timedelta, time
fake = Faker('id_ID')
def create_hospital_data(seed = 123):
  fake.seed_instance(seed)
  nama_rs = []
  for i in range (2):
    nama = fake.last_name()
    nama_rs.append('RS ' +fake.last_name())

  address_rs =[]
  for i in range (2):
    address = fake.address()
    address_rs.append(fake.address().replace("\n", ", "))

  hospital_dict = {'hospital_name':nama_rs, 'hospital_address': address_rs}
  data = pd.DataFrame(data=hospital_dict)

  return data
hospitals = create_hospital_data()
hospitals
hospital_name hospital_address
0 RS Santoso Gg. Cikutra Timur No. 571, Bekasi, Jawa Tengah...
1 RS Wasita Jalan H.J Maemunah No. 174, Magelang, NB 17695
def create_specializations(seed=123):
  fake.seed_instance(seed)
  specialization_name = ['General Practitioners', 'Pediatricians','Cardiologists','Dermatologists', 'Orthopedic']

  specialization_desc = fake.texts(nb_texts=5, max_nb_chars=50)

  spec_dict = {'specialization_name':specialization_name,
            'specialization _desc':specialization_desc}
  data = pd.DataFrame(data=spec_dict)

  return data
specializations = create_specializations()
specializations
specialization_name specialization _desc
0 General Practitioners Eaque quisquam eaque. Fugit natus exercitationem.
1 Pediatricians Cum temporibus quo soluta fugiat.
2 Cardiologists Pariatur commodi tenetur eos.
3 Dermatologists Reprehenderit odit natus vero accusamus.
4 Orthopedic Mollitia sunt quam harum quod accusamus.
def create_patients(seed=123, n=100):
  fake.seed_instance(seed)
  random.seed(seed)

  names = [fake.first_name() + " " + fake.last_name() for i in range (n)]

  gender_choices = ['Female','Male']
  genders = [random.choice(gender_choices) for i in range (n)]

  birthdates = [fake.date_of_birth(minimum_age = 15, maximum_age = 90) for i in range (n)]

  contacts = ['08'+ fake.numerify(text="##########") for i in range(n)]

  patient_dict = {'patient_name': names,
                    'patient_gender': genders,
                    'patient_birthdate': birthdates,
                    'patient_contact': contacts}
  data = pd.DataFrame(data = patient_dict)

  return data
patients = create_patients()
patients
patient_name patient_gender patient_birthdate patient_contact
0 Bala Santoso Female 1977-02-26 089490563144
1 Cayadi Wasita Male 1960-12-19 082656090723
2 Kasiran Kurniawan Female 1942-10-26 080482834773
3 Bahuwarna Wibowo Male 1990-01-21 084811798843
4 Gasti Purwanti Male 1951-12-13 088166710966
... ... ... ... ...
95 Uli Hartati Female 1962-12-16 081993230755
96 Lulut Aryani Male 2006-06-05 083668184629
97 Asman Manullang Female 1980-04-12 086253589902
98 Endah Simbolon Male 1937-04-19 086571070906
99 Dina Mulyani Female 1973-03-02 081212534333

100 rows × 4 columns

def create_doctors(seed=202, n=25, hospitals=None, specializations=None):
  fake.seed_instance(seed)
  random.seed(seed)
  names = [fake.first_name() + " " + fake.last_name() for i in range (n)]

  hospital_id = [random.randint(1, len(hospitals)) for i in range (n)]
  specialization_id = [random.randint(1, len(specializations)) for i in range (n)]

  doctors_dict = {'hospital_id':hospital_id,
                   'specialization_id': specialization_id,
                   'doctor_name': names}

  data = pd.DataFrame(doctors_dict)
  return data
doctors = create_doctors(hospitals=hospitals, specializations=specializations)
doctors
hospital_id specialization_id doctor_name
0 2 4 Prasetya Yuliarti
1 2 4 Wakiman Wasita
2 2 4 Harto Napitupulu
3 1 3 Vicky Widodo
4 1 1 Balidin Aryani
5 2 2 Timbul Hartati
6 1 1 Bagus Nuraini
7 2 1 Martana Pangestu
8 2 4 Jaswadi Lailasari
9 2 3 Rini Suryatmi
10 1 1 Syahrini Nurdiyanti
11 2 2 Harjo Wacana
12 1 2 Martani Kusmawati
13 1 2 Edi Anggraini
14 2 1 Wadi Wahyuni
15 1 3 Bagya Nurdiyanti
16 2 2 Jamal Rajasa
17 2 2 Tugiman Halimah
18 2 2 Azalea Saputra
19 1 4 Bakijan Nugroho
20 2 2 Ratih Januar
21 2 3 Bahuwarna Wibowo
22 1 3 Karma Anggriawan
23 1 2 Garang Prabowo
24 2 3 Hadi Maheswara
def create_doctor_schedule(doctors, seed=123):
    random.seed(seed)

    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
    start_end_pairs = [
        (time(8, 0), time(12, 0)),
        (time(13, 0), time(17, 0)),
        (time(9, 0), time(13, 0)),
        (time(10, 0), time(14, 0))
    ]

    records = []

    for index, doctor in doctors.iterrows():
        doctor_id = index + 1

        # pilih 3 hari unik secara acak
        work_days = random.sample(days, 3)

        for day in work_days:
            start_time, end_time = random.choice(start_end_pairs)
            records.append({
                'doctor_id': doctor_id,
                'day_of_week': day,
                'start_time': start_time,
                'end_time': end_time
            })

    df = pd.DataFrame(records)
    return df
doctor_schedule = create_doctor_schedule(doctors)
doctor_schedule
doctor_id day_of_week start_time end_time
0 1 Monday 10:00:00 14:00:00
1 1 Wednesday 09:00:00 13:00:00
2 1 Saturday 08:00:00 12:00:00
3 2 Monday 09:00:00 13:00:00
4 2 Thursday 08:00:00 12:00:00
... ... ... ... ...
70 24 Monday 10:00:00 14:00:00
71 24 Tuesday 08:00:00 12:00:00
72 25 Saturday 08:00:00 12:00:00
73 25 Tuesday 09:00:00 13:00:00
74 25 Thursday 10:00:00 14:00:00

75 rows × 4 columns

def create_appointment_slots(doctor_schedule, slot_duration_minutes=30):
    slots = []

    for index, sched in doctor_schedule.iterrows():
        start = sched['start_time']
        end = sched['end_time']
        current = datetime.combine(datetime.today(), start)

        while current.time() < end:
            slot_start = current.time()
            slot_end = (current + timedelta(minutes=slot_duration_minutes)).time()

            if slot_end > end:
                break

            slots.append({
                'schedule_id': index + 1,
                'slot_time_start': slot_start,
                'slot_time_end': slot_end
            })

            current += timedelta(minutes=slot_duration_minutes)

    return pd.DataFrame(slots)
appointment_slots = create_appointment_slots(doctor_schedule)
appointment_slots
schedule_id slot_time_start slot_time_end
0 1 10:00:00 10:30:00
1 1 10:30:00 11:00:00
2 1 11:00:00 11:30:00
3 1 11:30:00 12:00:00
4 1 12:00:00 12:30:00
... ... ... ...
595 75 11:30:00 12:00:00
596 75 12:00:00 12:30:00
597 75 12:30:00 13:00:00
598 75 13:00:00 13:30:00
599 75 13:30:00 14:00:00

600 rows × 3 columns

from datetime import datetime, timedelta
import random
import pandas as pd

def create_appointments(patients, appointment_slots, n=50, seed=123,
                        past_ratio=0.7, past_days=60, future_days=30):
    """
    Membuat dummy data untuk tabel appointments.

    Args:
        patients (DataFrame): tabel pasien.
        appointment_slots (DataFrame): tabel slot dokter.
        n (int): jumlah total appointment yang dihasilkan.
        seed (int): untuk reproducibility.
        past_ratio (float): proporsi appointment masa lalu.
        past_days (int): rentang hari ke belakang.
        future_days (int): rentang hari ke depan.

    Returns:
        DataFrame: appointments dengan kolom patient_id, slot_id, appointment_date, appointment_status.
    """
    random.seed(seed)

    appointments = []
    patient_ids = list(range(1, len(patients) + 1))
    slot_ids = list(range(1, len(appointment_slots) + 1))

    for _ in range(n):
        patient_id = random.choice(patient_ids)
        slot_id = random.choice(slot_ids)

        # tentukan apakah ini di masa lalu atau depan
        if random.random() < past_ratio:
            delta_days = -random.randint(1, past_days)  # mundur ke belakang
        else:
            delta_days = random.randint(1, future_days)  # ke depan

        appointment_date = datetime.today().date() + timedelta(days=delta_days)

        # tentukan status berdasarkan tanggal
        if appointment_date < datetime.today().date():
            status = random.choices(['Completed', 'Cancelled'], weights=[0.8, 0.2])[0]
        else:
            status = random.choices(['Scheduled', 'Cancelled'], weights=[0.9, 0.1])[0]

        appointments.append({
            'patient_id': patient_id,
            'slot_id': slot_id,
            'appointment_date': appointment_date,
            'appointment_status': status
        })

    # hapus duplikat slot_id + tanggal
    df = pd.DataFrame(appointments).drop_duplicates(subset=['slot_id', 'appointment_date'])
    return df.reset_index(drop=True)
appointments = create_appointments(patients, appointment_slots)
appointments
patient_id slot_id appointment_date appointment_status
0 7 275 2025-10-13 Completed
1 5 389 2025-10-18 Completed
2 7 164 2025-10-04 Completed
3 32 168 2025-10-12 Completed
4 77 387 2025-10-19 Completed
5 14 45 2025-10-30 Completed
6 3 299 2025-11-28 Scheduled
7 61 38 2025-11-20 Scheduled
8 62 212 2025-11-30 Scheduled
9 41 13 2025-09-20 Completed
10 56 552 2025-09-27 Completed
11 63 534 2025-10-16 Completed
12 97 186 2025-11-12 Cancelled
13 85 269 2025-10-18 Completed
14 10 468 2025-09-20 Completed
15 25 91 2025-11-21 Scheduled
16 46 235 2025-10-07 Completed
17 8 295 2025-12-04 Scheduled
18 30 558 2025-10-28 Completed
19 69 456 2025-10-10 Completed
20 18 468 2025-10-23 Completed
21 88 279 2025-11-30 Scheduled
22 14 65 2025-09-24 Completed
23 56 9 2025-11-22 Scheduled
24 89 414 2025-10-19 Completed
25 61 34 2025-11-20 Scheduled
26 4 168 2025-11-23 Scheduled
27 87 148 2025-10-24 Completed
28 81 59 2025-11-22 Scheduled
29 19 299 2025-11-22 Scheduled
30 69 346 2025-09-29 Completed
31 76 396 2025-09-22 Completed
32 86 22 2025-09-14 Completed
33 93 462 2025-09-19 Completed
34 21 535 2025-10-06 Completed
35 18 352 2025-10-05 Completed
36 29 584 2025-10-22 Cancelled
37 32 209 2025-11-18 Scheduled
38 65 83 2025-11-26 Scheduled
39 3 199 2025-10-25 Completed
40 41 86 2025-10-05 Completed
41 69 139 2025-11-25 Scheduled
42 51 29 2025-09-16 Cancelled
43 63 349 2025-10-25 Cancelled
44 17 7 2025-12-05 Scheduled
45 64 352 2025-11-01 Completed
46 47 472 2025-09-27 Cancelled
47 41 368 2025-10-11 Completed
48 97 198 2025-11-15 Scheduled
49 8 425 2025-10-23 Cancelled
def create_medical_records(appointments):
    records = []

    # Filter hanya appointment yang sudah completed
    completed_appointments = appointments[appointments["appointment_status"] == "Completed"]

    for index, appt in completed_appointments.iterrows():
      records.append({
            "appointment_id": index + 1,
            "diagnosis": fake.text(max_nb_chars=50),   # kalimat acak
            "treatment": fake.text(max_nb_chars=60),
           })

    return pd.DataFrame(records)
medical_records = create_medical_records(appointments)
medical_records
appointment_id diagnosis treatment
0 1 Quae nihil architecto ex fuga ipsam. Voluptas veniam ex modi quam. Iure quae provid...
1 2 Architecto non ducimus culpa saepe at cum. Voluptates et cum dolorum odio dignissimos.
2 3 Non nobis eos dolorem fugit inventore nam totam. Ducimus ducimus exercitationem inventore et fa...
3 4 Rem laudantium odit ratione nulla sint odio. Distinctio ullam ab recusandae impedit unde do...
4 5 Saepe alias laboriosam. Expedita autem sapiente sint quidem.
5 6 Soluta voluptatem culpa eos. Quibusdam quibusdam labore magni fuga labore i...
6 10 Similique illo quia laboriosam inventore quo. Saepe adipisci cum illum. Est quisquam illo.
7 11 Cum soluta dolorem facere numquam expedita. Tenetur quam iusto cum quisquam.
8 12 Eius id nemo expedita optio officiis. Ex reprehenderit sapiente debitis.
9 14 Vel saepe soluta tempore. Non hic adipisci doloremque voluptas. Rem ad a...
10 15 Saepe porro vitae ad eius tenetur sequi. Facilis corporis cumque. Eum sed voluptates ex...
11 17 Doloremque veritatis esse laboriosam et possimus. Ipsum voluptates eveniet fugiat vel reprehende...
12 19 Reprehenderit esse assumenda ducimus. Necessitatibus doloremque illum vel iusto aspe...
13 20 Laborum illo amet corrupti. Quaerat reprehenderit molestiae non numquam ex...
14 21 Saepe perspiciatis laborum repudiandae. Sunt nemo cum aut. Saepe reiciendis exercitati...
15 23 At occaecati vero vel. Saepe laudantium voluptas quia sed.
16 25 A tenetur illum. Ea tempore quo doloribus. Numquam iure recusan...
17 28 Nihil animi eius recusandae ut debitis. Modi aperiam totam. Numquam animi quia.
18 31 Assumenda quam voluptates voluptate et eos. Quibusdam modi labore quaerat voluptas.
19 32 Unde accusamus quidem aliquid nam impedit. Ipsam numquam occaecati ipsum tempora fugiat.
20 33 Reiciendis odio qui itaque. Facere eligendi eligendi harum.
21 34 Nihil nisi at consequuntur ipsum. Maxime ullam culpa eius velit laboriosam iure.
22 35 Odit totam cum in temporibus accusamus. Quos mollitia laboriosam.
23 36 Excepturi quia accusantium assumenda. Praesentium illum eius corrupti tempore vel.
24 40 Libero pariatur facilis natus porro praesentium. Non possimus harum pariatur.
25 41 Fugiat officiis quo. Deserunt provident commodi accusamus atque.
26 46 Omnis similique excepturi. In pariatur reprehenderit voluptatibus dolores.
27 48 Fuga soluta quia deserunt nihil. Eaque voluptatum praesentium possimus.
patients.to_csv("patients.csv", index=False)
hospitals.to_csv("hospitals.csv", index=False)
specializations.to_csv("specializations.csv", index=False)
doctors.to_csv("doctors.csv", index=False)
doctor_schedule.to_csv("doctor_schedule.csv", index=False)
appointment_slots.to_csv("appointment_slots.csv", index=False)
appointments.to_csv("appointments.csv", index=False)
medical_records.to_csv("medical_records.csv", index=False)
appointments.to_csv("appointments.csv", index=False)
medical_records.to_csv("medical_records.csv", index=False)