Test 1

Tue 01 July 2025
print("Hello World")
Hello World
def solve():
    try:
        n_input = input("Enter number of elements: ").strip()
        if not n_input:
            print("No input provided for number of elements.")
            return  
        n = int(n_input)  
        arr_input = input("Enter the array elements separated by space: ").strip()
        if not arr_input:
            print("No array elements provided.")
            return
        arr = list(map(int, arr_input.split()))
        if len(arr) != n:
            print(f"You entered {len(arr)} elements, but expected {n}.")
            return
        max_ending_here = max_so_far = arr[0]
        for x in arr[1:]:
            max_ending_here = max(x, max_ending_here + x)
            max_so_far = max(max_so_far, max_ending_here)
        print("Maximum Subarray Sum:", max_so_far)
    except ValueError as ve:
        print("Invalid input! Please enter integers only.")
        print("Error details:", ve)
solve()
Enter number of elements:  2
Enter the array elements separated by space:  10 10


Maximum Subarray Sum: 20
import bisect
def solve():
    try:
        arr_input = input().strip()
        if not arr_input:
            return
        arr = list(map(int, arr_input.split()))
        if not arr:
            return
        sub = []
        for num in arr:
            idx = bisect.bisect_left(sub, num)
            if idx == len(sub):
                sub.append(num)
            else:
                sub[idx] = num
        print(len(sub))
    except ValueError:
        print("Invalid input")
solve()
 10 9 2 5 3 7 101 18


4
def solve():
    try:
        s1 = input().strip()
        s2 = input().strip()
        n, m = len(s1), len(s2)
        dp = [[0]*(m+1) for _ in range(n+1)]
        for i in range(1, n+1):
            for j in range(1, m+1):
                if s1[i-1] == s2[j-1]:
                    dp[i][j] = dp[i-1][j-1] + 1
                else:
                    dp[i][j] = max(dp[i-1][j], dp[i][j-1])
        print(dp[n][m])
    except:
        print("Invalid input")
solve()
 Stefina
 Ai Engineer


2
def solve():
    try:
        arr = list(map(int, input().strip().split()))
        target = int(input().strip())
        n = len(arr)
        dp = [[False]*(target+1) for _ in range(n+1)]
        for i in range(n+1):
            dp[i][0] = True
        for i in range(1, n+1):
            for j in range(1, target+1):
                if arr[i-1] > j:
                    dp[i][j] = dp[i-1][j]
                else:
                    dp[i][j] = dp[i-1][j] or dp[i-1][j-arr[i-1]]
        print("Yes" if dp[n][target] else "No")
    except:
        print("Invalid input")

solve()
 1  2 3 4 5
 7


Yes
def solve():
    arr = list(map(int, input().strip().split()))
    count = 0
    candidate = None
    for num in arr:
        if count == 0:
            candidate = num
        count += (1 if num == candidate else -1)
    print(candidate)
solve()
 2 2 3 4 2 5 2 6


2
def solve():
    arr = list(map(int, input().strip().split()))
    target = int(input().strip())
    seen = {}
    for i, num in enumerate(arr):
        diff = target - num
        if diff in seen:
            print(seen[diff], i)
            return
        seen[num] = i
    print("No pair")
solve()
 1 2 3 4
 5


1 2
def solve():
    s = input().strip()
    print("Yes" if s == s[::-1] else "No")
solve()
 madam


Yes
def solve():
    s1 = input().strip()
    s2 = input().strip()
    print("Yes" if sorted(s1) == sorted(s2) else "No")
solve()
 listen
 silent


Yes
def solve():
    arr = list(map(int, input().strip().split()))
    n = len(arr)
    total = n * (n + 1) // 2
    print(total - sum(arr))
solve()
 3 0 1


2
def solve():
    from collections import defaultdict
    def dfs(v):
        visited[v] = True
        recStack[v] = True
        for neighbor in graph[v]:
            if not visited[neighbor] and dfs(neighbor):
                return True
            elif recStack[neighbor]:
                return True
        recStack[v] = False
        return False

    n, e = map(int, input().split())
    graph = defaultdict(list)
    for _ in range(e):
        u, v = map(int, input().split())
        graph[u].append(v)

    visited = [False]*n
    recStack = [False]*n
    for node in range(n):
        if not visited[node]:
            if dfs(node):
                print("Yes")
                return
    print("No")
solve()
 4 4
 0 1
 1 2
 2 3
 3 1


Yes
def solve():
    n = int(input())
    print(bin(n).count('1'))
solve()
 7


3
def solve():
    s = input().strip()
    from collections import Counter
    count = Counter(s)
    for ch in s:
        if count[ch] == 1:
            print(ch)
            return
    print("None")
solve()
 a b c a b d d


c
def solve():
    n = int(input())
    print("Yes" if n > 0 and (n & (n - 1)) == 0 else "No")
solve()
 16


Yes
def solve():
    import sys
    sys.setrecursionlimit(10000)
    def is_balanced(root):
        if not root:
            return 0, True
        lh, lb = is_balanced(root[1])
        rh, rb = is_balanced(root[2])
        balanced = lb and rb and abs(lh - rh) <= 1
        return max(lh, rh) + 1, balanced

    # Input format: (val, left_subtree, right_subtree) or None
    tree = eval(input())  
    print("Yes" if is_balanced(tree)[1] else "No")
solve()
 (1, (2, None, None), (3, None, None))


Yes
def solve():
    n = int(input())
    intervals = [tuple(map(int, input().split())) for _ in range(n)]
    intervals.sort(key=lambda x: x[1])
    count, end = 0, 0
    for s, e in intervals:
        if s >= end:
            count += 1
            end = e
    print(count)
solve()
 3
 1 3
 2 5
 4 7


2
def solve():
    def is_safe(board, row, col):
        for i in range(row):
            if board[i] == col or abs(board[i] - col) == abs(i - row):
                return False
        return True

    def solve_nq(n):
        def backtrack(row=0):
            if row == n:
                result.append(board[:])
                return
            for col in range(n):
                if is_safe(board, row, col):
                    board[row] = col
                    backtrack(row + 1)
        board = [-1] * n
        result = []
        backtrack()
        return result

    n = int(input())
    solutions = solve_nq(n)
    print(len(solutions))
solve()
 4


2
def solve():
    coins = list(map(int, input().split()))
    amount = int(input())
    dp = [float('inf')] * (amount + 1)
    dp[0] = 0
    for c in coins:
        for i in range(c, amount + 1):
            dp[i] = min(dp[i], dp[i - c] + 1)
    print(dp[amount] if dp[amount] != float('inf') else -1)
solve()
 1 2 5
 11


3
def solve():
    s = input().strip()
    word_dict = set(input().strip().split())
    dp = [False] * (len(s)+1)
    dp[0] = True
    for i in range(1, len(s)+1):
        for j in range(i):
            if dp[j] and s[j:i] in word_dict:
                dp[i] = True
                break
    print("Yes" if dp[-1] else "No")
solve()
 leetcode
 leet code


Yes
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, roc_curve
df = pd.read_csv("lung_cancer.csv")  # make sure this file is in your JupyterLab folder
df.head()
Name Surname Age Smokes AreaQ Alkhol Result
0 John Wick 35 3 5 4 1
1 John Constantine 27 20 2 5 1
2 Camela Anderson 30 0 5 2 0
3 Alex Telles 28 0 8 1 0
4 Diego Maradona 68 4 5 6 1
!pip install matplotlib seaborn
Collecting matplotlib
  Downloading matplotlib-3.10.3-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.2-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.58.4-cp312-cp312-win_amd64.whl.metadata (108 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.8-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Requirement already satisfied: numpy>=1.23 in c:\users\stefi\miniconda3\envs\py312\lib\site-packages (from matplotlib) (2.3.1)
Requirement already satisfied: packaging>=20.0 in c:\users\stefi\miniconda3\envs\py312\lib\site-packages (from matplotlib) (25.0)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-11.2.1-cp312-cp312-win_amd64.whl.metadata (9.1 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\stefi\miniconda3\envs\py312\lib\site-packages (from matplotlib) (2.9.0.post0)
Requirement already satisfied: pandas>=1.2 in c:\users\stefi\miniconda3\envs\py312\lib\site-packages (from seaborn) (2.3.0)
Requirement already satisfied: pytz>=2020.1 in c:\users\stefi\miniconda3\envs\py312\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in c:\users\stefi\miniconda3\envs\py312\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: six>=1.5 in c:\users\stefi\miniconda3\envs\py312\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)
Downloading matplotlib-3.10.3-cp312-cp312-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.1 MB ? eta -:--:--
   --- ------------------------------------ 0.8/8.1 MB 2.1 MB/s eta 0:00:04
   ------ --------------------------------- 1.3/8.1 MB 2.4 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   ------- -------------------------------- 1.6/8.1 MB 2.5 MB/s eta 0:00:03
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   --------- ------------------------------ 1.8/8.1 MB 477.1 kB/s eta 0:00:14
   ---------- ----------------------------- 2.1/8.1 MB 336.5 kB/s eta 0:00:18
   ---------- ----------------------------- 2.1/8.1 MB 336.5 kB/s eta 0:00:18
   ----------- ---------------------------- 2.4/8.1 MB 362.7 kB/s eta 0:00:16
   -------------- ------------------------- 2.9/8.1 MB 432.4 kB/s eta 0:00:12
   --------------- ------------------------ 3.1/8.1 MB 464.8 kB/s eta 0:00:11
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   ------------------ --------------------- 3.7/8.1 MB 526.8 kB/s eta 0:00:09
   -------------------- ------------------- 4.2/8.1 MB 247.2 kB/s eta 0:00:16
   -------------------- ------------------- 4.2/8.1 MB 247.2 kB/s eta 0:00:16
   ---------------------- ----------------- 4.5/8.1 MB 256.6 kB/s eta 0:00:15
   ----------------------- ---------------- 4.7/8.1 MB 268.6 kB/s eta 0:00:13
   ------------------------ --------------- 5.0/8.1 MB 280.4 kB/s eta 0:00:12
   ------------------------- -------------- 5.2/8.1 MB 293.2 kB/s eta 0:00:10
   --------------------------- ------------ 5.5/8.1 MB 304.5 kB/s eta 0:00:09
   ----------------------------- ---------- 6.0/8.1 MB 329.6 kB/s eta 0:00:07
   ------------------------------- -------- 6.3/8.1 MB 342.4 kB/s eta 0:00:06
   ----------------------------------- ---- 7.1/8.1 MB 380.6 kB/s eta 0:00:03
   ------------------------------------- -- 7.6/8.1 MB 405.3 kB/s eta 0:00:02
   ---------------------------------------- 8.1/8.1 MB 427.7 kB/s eta 0:00:00
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Downloading contourpy-1.3.2-cp312-cp312-win_amd64.whl (223 kB)
Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)
Downloading fonttools-4.58.4-cp312-cp312-win_amd64.whl (2.2 MB)
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   --------- ------------------------------ 0.5/2.2 MB 3.4 MB/s eta 0:00:01
   ----------------------- ---------------- 1.3/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 3.4 MB/s eta 0:00:01
   -------------------------------- ------- 1.8/2.2 MB 521.5 kB/s eta 0:00:01
   ---------------------------------------- 2.2/2.2 MB 619.5 kB/s eta 0:00:00
Downloading kiwisolver-1.4.8-cp312-cp312-win_amd64.whl (71 kB)
Downloading pillow-11.2.1-cp312-cp312-win_amd64.whl (2.7 MB)
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ------- -------------------------------- 0.5/2.7 MB 3.4 MB/s eta 0:00:01
   ----------------------- ---------------- 1.6/2.7 MB 4.0 MB/s eta 0:00:01
   ------------------------------- -------- 2.1/2.7 MB 3.8 MB/s eta 0:00:01
   ---------------------------------------- 2.7/2.7 MB 3.5 MB/s eta 0:00:00
Downloading pyparsing-3.2.3-py3-none-any.whl (111 kB)
Installing collected packages: pyparsing, pillow, kiwisolver, fonttools, cycler, contourpy, matplotlib, seaborn

   ---------------------------------------- 0/8 [pyparsing]
   ----- ---------------------------------- 1/8 [pillow]
   ----- ---------------------------------- 1/8 [pillow]
   ----- ---------------------------------- 1/8 [pillow]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   --------------- ------------------------ 3/8 [fonttools]
   -------------------- ------------------- 4/8 [cycler]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ------------------------------ --------- 6/8 [matplotlib]
   ----------------------------------- ---- 7/8 [seaborn]
   ----------------------------------- ---- 7/8 [seaborn]
   ---------------------------------------- 8/8 [seaborn]

Successfully installed contourpy-1.3.2 cycler-0.12.1 fonttools-4.58.4 kiwisolver-1.4.8 matplotlib-3.10.3 pillow-11.2.1 pyparsing-3.2.3 seaborn-0.13.2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("lung_cancer.csv")  
df.columns = df.columns.str.strip().str.upper().str.replace(" ", "_")
df.replace({'YES': 1, 'NO': 0, 'M': 1, 'F': 0}, inplace=True)
df.dropna(inplace=True)
X = df.drop(["RESULT", "NAME", "SURNAME"], axis=1)
y = df["RESULT"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)
prec = report['1']['precision']
rec = report['1']['recall']
f1 = report['1']['f1-score']
print("Performance Points ")
print(f"Accuracy Point : {acc*100:.2f}")
print(f"Precision Point: {prec*100:.2f}")
print(f"Recall Point   : {rec*100:.2f}")
print(f"F1-score Point : {f1*100:.2f}")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="Greens")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
Performance Points 
Accuracy Point : 94.44
Precision Point: 100.00
Recall Point   : 85.71
F1-score Point : 92.31

png

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("heart.csv")  
df.columns = df.columns.str.strip().str.upper().str.replace(" ", "_")
df.dropna(inplace=True)
X = df.drop("TARGET", axis=1)
y = df["TARGET"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)
prec = report['1']['precision']
rec = report['1']['recall']
f1 = report['1']['f1-score']
print("Performance Points")
print(f"Accuracy Point : {acc*100:.2f}")
print(f"Precision Point: {prec*100:.2f}")
print(f"Recall Point   : {rec*100:.2f}")
print(f"F1-score Point : {f1*100:.2f}")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
Performance Points
Accuracy Point : 82.42
Precision Point: 84.00
Recall Point   : 84.00
F1-score Point : 84.00

png

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("diabetes.csv")
df.columns = df.columns.str.strip().str.upper().str.replace(" ", "_")
df.dropna(inplace=True)
X = df.drop("OUTCOME", axis=1)
y = df["OUTCOME"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)
prec = report['1']['precision']
rec = report['1']['recall']
f1 = report['1']['f1-score']
print("Performance Points")
print(f"Accuracy Point : {acc*100:.2f}")
print(f"Precision Point: {prec*100:.2f}")
print(f"Recall Point   : {rec*100:.2f}")
print(f"F1-score Point : {f1*100:.2f}")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="Purples")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
Performance Points
Accuracy Point : 75.32
Precision Point: 63.86
Recall Point   : 66.25
F1-score Point : 65.03

png

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("breast_cancer.csv")
df.columns = df.columns.str.strip().str.upper().str.replace(" ", "_")
df['DIAGNOSIS'] = df['DIAGNOSIS'].replace({'M': 1, 'B': 0})
df = df.infer_objects(copy=False)
df.dropna(inplace=True)
X = df.drop(["DIAGNOSIS", "ID"], axis=1, errors='ignore')
y = df["DIAGNOSIS"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)
prec = report['1']['precision']
rec = report['1']['recall']
f1 = report['1']['f1-score']
print("Performance Points")
print(f"Accuracy Point : {acc*100:.2f}")
print(f"Precision Point: {prec*100:.2f}")
print(f"Recall Point   : {rec*100:.2f}")
print(f"F1-score Point : {f1*100:.2f}")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="Reds")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
Performance Points
Accuracy Point : 97.08
Precision Point: 98.33
Recall Point   : 93.65
F1-score Point : 95.93

png

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("kidney_disease.csv")
df.columns = df.columns.str.strip().str.upper().str.replace(" ", "_")
df.replace({
    'yes': 1, 'no': 0,
    'normal': 0, 'abnormal': 1,
    'present': 1, 'notpresent': 0,
    'ckd': 1, 'notckd': 0,
    'CKD': 1, 'NOTCKD': 0,
    'good': 1, 'poor': 0
}, inplace=True)
df.drop(['ID'], axis=1, errors='ignore', inplace=True)
df.dropna(inplace=True)
df['CLASSIFICATION'] = df['CLASSIFICATION'].astype(int)
X = df.drop("CLASSIFICATION", axis=1)
y = df["CLASSIFICATION"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)
prec = report['1']['precision']
rec = report['1']['recall']
f1 = report['1']['f1-score']
print("Performance Points")
print(f"Accuracy Point : {acc*100:.2f}")
print(f"Precision Point: {prec*100:.2f}")
print(f"Recall Point   : {rec*100:.2f}")
print(f"F1-score Point : {f1*100:.2f}")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="YlGn")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
Performance Points
Accuracy Point : 100.00
Precision Point: 100.00
Recall Point   : 100.00
F1-score Point : 100.00

png

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("parkinsons.csv")
df.drop("name", axis=1, inplace=True)
X = df.drop("status", axis=1)   # status = 1 (Parkinson's), 0 (Healthy)
y = df["status"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)
prec = report['1']['precision']
rec = report['1']['recall']
f1 = report['1']['f1-score']
print("Performance Points")
print(f"Accuracy Point : {acc*100:.2f}")
print(f"Precision Point: {prec*100:.2f}")
print(f"Recall Point   : {rec*100:.2f}")
print(f"F1-score Point : {f1*100:.2f}")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="Oranges")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
Performance Points
Accuracy Point : 93.22
Precision Point: 93.48
Recall Point   : 97.73
F1-score Point : 95.56

png

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("COVID19_symptoms.csv")
df.columns = df.columns.str.strip().str.upper().str.replace(" ", "_")
df.replace({
    'YES': 1, 'NO': 0,
    'NONE': 0,
    'GOOD': 1, 'POOR': 0,
    'FEMALE': 0, 'MALE': 1, 'TRANSGENDER': 2,
    "DON'T-KNOW": 2
}, inplace=True)
df.drop(['COUNTRY'], axis=1, inplace=True)
df.dropna(inplace=True)
def encode_severity(row):
    if row['SEVERITY_NONE'] == 1:
        return 0
    elif row['SEVERITY_MILD'] == 1:
        return 1
    elif row['SEVERITY_MODERATE'] == 1:
        return 2
    elif row['SEVERITY_SEVERE'] == 1:
        return 3
    else:
        return -1  
df['SEVERITY_LABEL'] = df.apply(encode_severity, axis=1)
df.drop(['SEVERITY_NONE', 'SEVERITY_MILD', 'SEVERITY_MODERATE', 'SEVERITY_SEVERE'], axis=1, inplace=True)
X = df.drop("SEVERITY_LABEL", axis=1)
y = df["SEVERITY_LABEL"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)

print("Performance Metrics")
print(f"Accuracy: {acc*100:.2f}%")
for cls in ["1", "2", "3"]:
    if cls in report:
        print(f"Class {cls}: Precision {report[cls]['precision']*100:.2f}%, Recall {report[cls]['recall']*100:.2f}%, F1 {report[cls]['f1-score']*100:.2f}%")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted Severity")
plt.ylabel("Actual Severity")
plt.title("Confusion Matrix - COVID Severity")
plt.show()
Performance Metrics
Accuracy: 13.15%
Class 1: Precision 13.21%, Recall 12.83%, F1 13.02%
Class 2: Precision 13.19%, Recall 12.76%, F1 12.97%
Class 3: Precision 12.98%, Recall 13.40%, F1 13.18%

png

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("survey.csv")
df = df[["Age", "Gender", "family_history", "benefits", "care_options", "seek_help", "mental_health_consequence"]]
df.dropna(inplace=True)
def clean_gender(g):
    g = g.lower()
    if "male" in g:
        return 0
    elif "female" in g:
        return 1
    else:
        return 2
df["Gender"] = df["Gender"].apply(clean_gender)
df.replace({
    "Yes": 1, "No": 0,
    "Don't know": 2, "Not sure": 2, "Maybe": 2,
    "Some of them": 1, "Not available": 0
}, inplace=True)
df["mental_health_consequence"] = df["mental_health_consequence"].replace({
    "Yes": 1,
    "No": 0,
    "Maybe": 1   # Treat "Maybe" as potential consequence
})
df["mental_health_consequence"] = df["mental_health_consequence"].astype(int)
X = df.drop("mental_health_consequence", axis=1)
y = df["mental_health_consequence"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
acc = accuracy_score(y_test, y_pred)
print("Mental Health Classification")
print(f"Accuracy       : {acc*100:.2f}%")
print(f"Precision (Yes): {report['1']['precision']*100:.2f}%")
print(f"Recall (Yes)   : {report['1']['recall']*100:.2f}%")
print(f"F1-score (Yes) : {report['1']['f1-score']*100:.2f}%")
plt.figure(figsize=(5, 3))
sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, fmt="d", cmap="Oranges")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Mental Health Impact")
plt.show()
📊 Mental Health Classification
✅ Accuracy       : 35.71%
🎯 Precision (Yes): 34.29%
🔁 Recall (Yes)   : 23.08%
📌 F1-score (Yes) : 27.59%

png

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
df = pd.read_csv("stock_data.csv")
print("Columns in CSV:", df.columns.tolist())
date_col = [col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()]
if date_col:
    df[date_col[0]] = pd.to_datetime(df[date_col[0]])
    df.set_index(date_col[0], inplace=True)
stock_col = df.select_dtypes(include='number').columns[0]
data = df[[stock_col]].dropna()
data.rename(columns={stock_col: "Close"}, inplace=True)
data["Target"] = data["Close"].shift(-1)
data.dropna(inplace=True)
X = data[["Close"]]
y = data["Target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")
plt.figure(figsize=(10, 5))
plt.plot(y_test.index, y_test, label="Actual")
plt.plot(y_test.index, y_pred, label="Predicted")
plt.title(f"{stock_col} - Next Day Price Prediction")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()
Columns in CSV: ['Unnamed: 0', 'Stock_1', 'Stock_2', 'Stock_3', 'Stock_4', 'Stock_5']
RMSE: 0.98
R² Score: 0.96

png

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
df = pd.read_csv("stock_data.csv")
date_col = [col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()]
if date_col:
    df[date_col[0]] = pd.to_datetime(df[date_col[0]])
    df.set_index(date_col[0], inplace=True)
stock_col = df.select_dtypes(include='number').columns[0]
df = df[[stock_col]].rename(columns={stock_col: "Close"})
df["MA_5"] = df["Close"].rolling(window=5).mean()
df["MA_10"] = df["Close"].rolling(window=10).mean()
df["Daily_Return"] = df["Close"].pct_change()
df["Target"] = df["Close"].shift(-1)  # Predict next day's price
df.dropna(inplace=True)
X = df[["Close", "MA_5", "MA_10", "Daily_Return"]]
y = df["Target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f" RMSE: {rmse:.2f}")
print(f" R² Score: {r2:.2f}")
plt.figure(figsize=(12, 6))
plt.plot(y_test.index, y_test, label="Actual", linewidth=2)
plt.plot(y_test.index, y_pred, label="Predicted", linestyle='--')
plt.title("Stock Price Prediction with Moving Averages")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.tight_layout()
plt.show()
 RMSE: 2.13
 R² Score: 0.79

png

score = 0
import pandas as pd

data = {
    'city': ['Toronto', 'Montreal', 'Waterloo'],
    'points': [80, 70, 90]
}
df = pd.DataFrame(data)
score += 40  
df['code'] = [1, 2, 3]
score += 40 
df['points'] = df['points'] + 10
score += 40  
from datetime import datetime
def get_age(d):
    d1 = datetime.now()
    months = (d1.year - d.year) * 12 + d1.month - d.month
    year = int(months / 12)
    return year
age = get_age(datetime(1991, 1, 1))
score += 40  # Function logic used
df['status'] = df['points'].apply(lambda x: 'Pass' if x >= 90 else 'Fail')
score += 40  
print("Final Score:", score)
print(df)
Final Score: 200
       city  points  code status
0   Toronto      90     1   Pass
1  Montreal      80     2   Fail
2  Waterloo     100     3   Pass
import pandas as pd
data = {
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'marks': [85, 67, 90, 74]
}
df = pd.DataFrame(data)
def get_grade(mark):
    if mark >= 90:
        return 'A'
    elif mark >= 80:
        return 'B'
    elif mark >= 70:
        return 'C'
    elif mark >= 60:
        return 'D'
    else:
        return 'F'
df['grade'] = df['marks'].apply(get_grade)
average_mark = df['marks'].mean()
df['result'] = df['marks'].apply(lambda x: 'Pass' if x >= 60 else 'Fail')
print(df)
print("Average Mark:", average_mark)
      name  marks grade result
0    Alice     85     B   Pass
1      Bob     67     D   Pass
2  Charlie     90     A   Pass
3    David     74     C   Pass
Average Mark: 79.0
import pandas as pd
from io import StringIO
from datetime import datetime
csv_data = StringIO("""
name,join_date,salary,position
Alice,2016-08-01,95000,Manager
Bob,2019-07-15,60000,Engineer
Charlie,2014-01-10,120000,Director
David,2021-03-20,45000,Intern
Ella,2018-09-30,70000,Engineer
""")
df = pd.read_csv(csv_data, parse_dates=['join_date'])
today = datetime.today()
df['experience_years'] = df['join_date'].apply(lambda d: (today - d).days // 365)
df['salary_after_tax'] = df['salary'].apply(lambda x: x * 0.82)
def get_level(exp, salary):
    if exp >= 8 and salary > 100000:
        return 'Senior Executive'
    elif exp >= 5:
        return 'Experienced'
    elif exp >= 2:
        return 'Mid-Level'
    else:
        return 'Junior'
df['level'] = df.apply(lambda row: get_level(row['experience_years'], row['salary']), axis=1)
high_performers = df[(df['salary'] > 70000) & (df['experience_years'] > 3)]
print("All Employees:\n", df, "\n")
print("High Performers:\n", high_performers)
All Employees:
       name  join_date  salary  position  experience_years  salary_after_tax  \
0    Alice 2016-08-01   95000   Manager                 8           77900.0   
1      Bob 2019-07-15   60000  Engineer                 5           49200.0   
2  Charlie 2014-01-10  120000  Director                11           98400.0   
3    David 2021-03-20   45000    Intern                 4           36900.0   
4     Ella 2018-09-30   70000  Engineer                 6           57400.0

              level  
0       Experienced  
1       Experienced  
2  Senior Executive  
3         Mid-Level  
4       Experienced

High Performers:
       name  join_date  salary  position  experience_years  salary_after_tax  \
0    Alice 2016-08-01   95000   Manager                 8           77900.0   
2  Charlie 2014-01-10  120000  Director                11           98400.0

              level  
0       Experienced  
2  Senior Executive
import pandas as pd
from datetime import datetime
employees = pd.DataFrame({
    'emp_id': [101, 102, 103, 104, 105, 106],
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank'],
    'department': ['Engineering', 'Engineering', 'HR', 'Finance', 'Finance', 'Engineering'],
    'join_date': pd.to_datetime(['2015-05-21', '2018-03-15', '2012-06-30', '2019-01-01', '2017-11-23', '2022-05-19']),
    'salary': [90000, 72000, 60000, 65000, 58000, 50000]
})
projects = pd.DataFrame({
    'project_id': [1, 2, 3, 4],
    'project_name': ['Alpha', 'Beta', 'Gamma', 'Delta'],
    'assigned_to': [[101, 102], [104], [103, 105], [101, 106]],
    'deadline': pd.to_datetime(['2025-12-01', '2024-11-01', '2025-03-15', '2025-08-30'])
})
project_long = projects.explode('assigned_to').rename(columns={'assigned_to': 'emp_id'})
merged = pd.merge(project_long, employees, on='emp_id', how='left')
today = pd.to_datetime(datetime.today().date())
merged['experience_yrs'] = (today - merged['join_date']).dt.days // 365
workload = merged.groupby('emp_id').size().reset_index(name='project_count')
merged = pd.merge(merged, workload, on='emp_id', how='left')
import numpy as np
conditions = [
    (merged['project_count'] >= 3),
    (merged['project_count'] == 2),
    (merged['project_count'] == 1)
]
choices = ['Overloaded', 'Balanced', 'Light']
merged['workload_status'] = np.select(conditions, choices, default='Unassigned')
department_summary = merged.groupby('department').agg(
    total_employees=('emp_id', 'nunique'),
    avg_salary=('salary', 'mean'),
    total_projects=('project_id', 'nunique'),
    avg_experience=('experience_yrs', 'mean')
).reset_index()
top_employees = merged.sort_values('experience_yrs', ascending=False).drop_duplicates('department')
print("\nFull Merged Data (Project Assignments + Employees):")
print(merged[['emp_id', 'name', 'department', 'project_name', 'workload_status', 'experience_yrs']])
print("\nDepartment Summary:")
print(department_summary)
print("\nTop Experienced Employee per Department:")
print(top_employees[['department', 'name', 'experience_yrs', 'salary']])
Full Merged Data (Project Assignments + Employees):
  emp_id     name   department project_name workload_status  experience_yrs
0    101    Alice  Engineering        Alpha        Balanced              10
1    102      Bob  Engineering        Alpha           Light               7
2    104    David      Finance         Beta           Light               6
3    103  Charlie           HR        Gamma           Light              13
4    105      Eva      Finance        Gamma           Light               7
5    101    Alice  Engineering        Delta        Balanced              10
6    106    Frank  Engineering        Delta           Light               3

Department Summary:
    department  total_employees  avg_salary  total_projects  avg_experience
0  Engineering                3     75500.0               2             7.5
1      Finance                2     61500.0               2             6.5
2           HR                1     60000.0               1            13.0

Top Experienced Employee per Department:
    department     name  experience_yrs  salary
3           HR  Charlie              13   60000
5  Engineering    Alice              10   90000
4      Finance      Eva               7   58000
import pandas as pd
import numpy as np
from datetime import datetime

# Employee DataFrame
df_emp = pd.DataFrame({
    'emp_id': range(1001, 1031),
    'name': [f'Emp{i}' for i in range(1, 31)],
    'department': np.random.choice(['HR', 'Finance', 'Engineering', 'Sales'], 30),
    'join_date': pd.date_range(start='2010-01-01', periods=30, freq='180D'),
    'salary': np.random.randint(50000, 120000, 30),
    'monthly_sales': np.random.randint(3000, 10000, 30)
})

# Project DataFrame
df_proj = pd.DataFrame({
    'project_id': range(201, 211),
    'project_name': [f'Proj{i}' for i in range(1, 11)],
    'assigned_to': [list(np.random.choice(df_emp['emp_id'], size=np.random.randint(2, 6), replace=False)) for _ in range(10)],
    'deadline': pd.date_range(start='2025-01-01', periods=10, freq='30D')
})

# Enrichment
today = pd.to_datetime(datetime.today().date())
df_emp['experience_years'] = (today - df_emp['join_date']).dt.days // 365
df_emp['tax'] = df_emp['salary'] * 0.18
df_emp['net_salary'] = df_emp['salary'] - df_emp['tax']

# Bonus and performance
def calc_bonus(s): return 0.1 if s >= 9000 else 0.08 if s >= 7000 else 0.05 if s >= 5000 else 0.03
df_emp['bonus_percent'] = df_emp['monthly_sales'].apply(calc_bonus)
df_emp['monthly_bonus'] = df_emp['salary'] * df_emp['bonus_percent']

def perf(s): return 'Excellent' if s >= 9500 else 'Good' if s >= 7000 else 'Average' if s >= 5000 else 'Low'
df_emp['performance'] = df_emp['monthly_sales'].apply(perf)

df_emp['annual_total'] = (df_emp['net_salary'] + df_emp['monthly_bonus']) * 12

def grade(row):
    if row['performance'] == 'Excellent' and row['experience_years'] > 5: return 'A+'
    if row['performance'] == 'Good': return 'A'
    if row['performance'] == 'Average': return 'B'
    return 'C'
df_emp['grade'] = df_emp.apply(grade, axis=1)

df_summary = df_emp.groupby('department').agg(
    avg_salary=('salary', 'mean'),
    max_bonus=('monthly_bonus', 'max'),
    avg_exp=('experience_years', 'mean'),
    perf_score=('performance', lambda x: (x == 'Excellent').sum())
).reset_index()

df_emp['exp_category'] = pd.cut(df_emp['experience_years'], [0, 3, 6, 10, 20], labels=['Junior', 'Mid', 'Senior', 'Veteran'])
df_emp['promotion_eligible'] = (df_emp['experience_years'] >= 5) & (df_emp['performance'].isin(['Excellent', 'Good']))
df_emp['tax_saved'] = df_emp['monthly_bonus'] * 0.3

df_masked = df_emp.copy()
df_masked['tax'] = '****'

df_emp['salary_lakh'] = df_emp['salary'] / 1e5
df_emp['net_salary_lakh'] = df_emp['net_salary'] / 1e5

df_rank = df_emp.groupby('department')['monthly_sales'].mean().rank(ascending=False).astype(int).reset_index()
df_rank.columns = ['department', 'dept_rank']
df_emp = df_emp.merge(df_rank, on='department', how='left')

df_emp['salary_bucket'] = pd.cut(df_emp['salary'], [0, 60000, 80000, 100000, 150000], labels=['<60K', '60-80K', '80-100K', '100K+'])

df_emp['new_salary'] = np.where(df_emp['promotion_eligible'], df_emp['salary'] * 1.1, df_emp['salary'])
score_map = {'Excellent': 3, 'Good': 2, 'Average': 1, 'Low': 0}
df_emp['perf_score'] = df_emp['performance'].map(score_map)
df_emp['sales_z'] = (df_emp['monthly_sales'] - df_emp['monthly_sales'].mean()) / df_emp['monthly_sales'].std()

project_exp = df_proj.explode('assigned_to').rename(columns={'assigned_to': 'emp_id'})
df_merged = project_exp.merge(df_emp, on='emp_id', how='left')

df_merged['days_to_deadline'] = (df_merged['deadline'] - today).dt.days
df_merged['proj_bonus_share'] = df_merged['monthly_bonus'] / df_merged.groupby('project_id')['monthly_bonus'].transform('sum')

df_dept_perf = df_merged.groupby(['department', 'project_name']).agg(
    avg_perf_score=('perf_score', 'mean'),
    total_proj_bonus=('proj_bonus_share', 'sum')
).reset_index()

df_eng = df_emp[df_emp['department'] == 'Engineering'].copy()
df_fin = df_emp[df_emp['department'] == 'Finance'].copy()
df_hr = df_emp[df_emp['department'] == 'HR'].copy()
df_sales = df_emp[df_emp['department'] == 'Sales'].copy()

df_eng.loc[:, 'bench_status'] = np.where(df_eng['monthly_sales'] < 4000, 'Bench', 'Active')
df_fin.loc[:, 'risk'] = np.where(df_fin['experience_years'] < 2, 'High', 'Normal')

df_combined = pd.concat([df_eng, df_fin, df_hr, df_sales])
df_combined['normalized_bonus'] = (df_combined['monthly_bonus'] - df_combined['monthly_bonus'].min()) / (df_combined['monthly_bonus'].max() - df_combined['monthly_bonus'].min())

df_top_perf = df_emp[df_emp['performance'] == 'Excellent'].sort_values('monthly_sales', ascending=False).head(10)
df_low_perf = df_emp[df_emp['performance'] == 'Low'].sort_values('monthly_sales').head(10)

df_proj_perf = df_merged.groupby('project_id').agg(
    avg_sales=('monthly_sales', 'mean'),
    total_employees=('emp_id', 'nunique')
).reset_index()

df_emp['salary_growth'] = df_emp['new_salary'] - df_emp['salary']
df_emp['effective_tax_rate'] = df_emp['tax'] / df_emp['salary']
df_emp['bonus_efficiency'] = df_emp['monthly_bonus'] / df_emp['monthly_sales']
df_emp['net_to_gross_ratio'] = df_emp['net_salary'] / df_emp['salary']

df_perf_deviation = df_emp[['emp_id', 'monthly_sales', 'sales_z']].sort_values('sales_z', ascending=False)
df_bonus_outliers = df_emp[df_emp['monthly_bonus'] > df_emp['monthly_bonus'].mean() + 2 * df_emp['monthly_bonus'].std()]
df_tax_outliers = df_emp[df_emp['tax'] > df_emp['tax'].mean() + 2 * df_emp['tax'].std()]
df_exp_leaders = df_emp.sort_values('experience_years', ascending=False).head(5)
df_newcomers = df_emp.sort_values('join_date', ascending=False).head(5)
df_veterans = df_emp[df_emp['experience_years'] >= 10]

df_emp['efficiency_score'] = df_emp['net_salary'] / (1 + df_emp['experience_years']) * df_emp['bonus_percent']
df_emp['team_fit_score'] = np.where(df_emp['grade'].isin(['A+', 'A']), 1, 0.5)

df_pivot_perf = pd.pivot_table(df_emp, index='department', columns='grade', values='salary', aggfunc='mean').fillna(0)
df_pivot_bonus = pd.pivot_table(df_emp, index='exp_category', columns='performance', values='monthly_bonus', aggfunc='mean', observed=False).fillna(0)

df_proj_long = df_proj.explode('assigned_to').rename(columns={'assigned_to': 'emp_id'})
df_proj_long = df_proj_long.merge(df_emp[['emp_id', 'salary']], on='emp_id', how='left')
df_proj_long['share_salary'] = df_proj_long['salary'] / df_proj_long.groupby('project_id')['salary'].transform('sum')

df_final_export = df_emp[['emp_id', 'name', 'department', 'grade', 'performance', 'monthly_bonus', 'new_salary', 'promotion_eligible']]
df_export_summary = df_emp.groupby('department')[['salary', 'monthly_bonus', 'net_salary']].mean().reset_index()

df_stat = df_emp.describe()
df_exp_group = df_emp.groupby('exp_category', observed=False)[['salary', 'monthly_bonus']].mean().reset_index()
df_ranked = df_emp.sort_values(['perf_score', 'experience_years'], ascending=[False, False])
df_department_max_bonus = df_emp.groupby('department')['monthly_bonus'].max().reset_index()
df_top10_salary = df_emp.sort_values('salary', ascending=False).head(10)
df_bottom10_salary = df_emp.sort_values('salary').head(10)

df_emp['relative_perf'] = df_emp['perf_score'] / df_emp['experience_years'].replace(0, 1)

df_proj_assignments = df_proj_long.groupby('emp_id').agg(total_projects=('project_id', 'count')).reset_index()
df_merged_final = df_emp.merge(df_proj_assignments, on='emp_id', how='left')
df_merged_final['total_projects'] = df_merged_final['total_projects'].fillna(0).astype(int)

df_emp['loyalty_index'] = df_emp['experience_years'] / df_emp['department'].map(df_emp.groupby('department')['experience_years'].mean())
print("\nFinal Employee Data Sample:")
print(df_emp.head())

print("\nDepartment Summary:")
print(df_summary)

print("\nTop 5 Performers:")
print(df_top_perf[['emp_id', 'name', 'monthly_sales', 'performance']])

print("\nNewcomers (Recently Joined):")
print(df_newcomers[['emp_id', 'name', 'join_date']])

print("\nMax Monthly Bonus by Department:")
print(df_department_max_bonus)

print("\nEmployee Stats Description:")
print(df_stat)

print("\nBottom 5 Performers (Low):")
print(df_low_perf[['emp_id', 'name', 'monthly_sales', 'performance']])

print("\nProject Performance Summary:")
print(df_proj_perf)

print("\nExperience Group Summary:")
print(df_exp_group)
Final Employee Data Sample:
   emp_id  name   department  join_date  salary  monthly_sales  \
0    1001  Emp1        Sales 2010-01-01   79408           9961   
1    1002  Emp2           HR 2010-06-30  110847           8478   
2    1003  Emp3      Finance 2010-12-27   55721           9216   
3    1004  Emp4           HR 2011-06-25   84660           8227   
4    1005  Emp5  Engineering 2011-12-22  101856           6356

   experience_years       tax  net_salary  bonus_percent  ...  perf_score  \
0                15  14293.44    65114.56           0.10  ...           3   
1                15  19952.46    90894.54           0.08  ...           2   
2                14  10029.78    45691.22           0.10  ...           2   
3                14  15238.80    69421.20           0.08  ...           2   
4                13  18334.08    83521.92           0.05  ...           1

    sales_z  salary_growth effective_tax_rate bonus_efficiency  \
0  2.041904         7940.8               0.18         0.797189   
1  1.268971        11084.7               0.18         1.045973   
2  1.653613         5572.1               0.18         0.604612   
3  1.138151         8466.0               0.18         0.823241   
4  0.162995            0.0               0.18         0.801259

   net_to_gross_ratio  efficiency_score  team_fit_score  relative_perf  \
0                0.82        406.966000             1.0       0.200000   
1                0.82        454.472700             1.0       0.133333   
2                0.82        304.608133             1.0       0.142857   
3                0.82        370.246400             1.0       0.142857   
4                0.82        298.292571             0.5       0.076923

   loyalty_index  
0       1.849315  
1       1.849315  
2       1.666667  
3       1.726027  
4       1.750000

[5 rows x 32 columns]

Department Summary:
    department    avg_salary  max_bonus   avg_exp  perf_score
0  Engineering  83046.857143    7752.80  7.428571           2
1      Finance  72508.000000    5572.10  8.400000           0
2           HR  86317.555556    8867.76  8.111111           0
3        Sales  86086.222222    7940.80  8.111111           1

Top 5 Performers:
    emp_id   name  monthly_sales performance
0     1001   Emp1           9961   Excellent
10    1011  Emp11           9908   Excellent
14    1015  Emp15           9589   Excellent

Newcomers (Recently Joined):
    emp_id   name  join_date
29    1030  Emp30 2024-04-17
28    1029  Emp29 2023-10-20
27    1028  Emp28 2023-04-23
26    1027  Emp27 2022-10-25
25    1026  Emp26 2022-04-28

Max Monthly Bonus by Department:
    department  monthly_bonus
0  Engineering        7752.80
1      Finance        5572.10
2           HR        8867.76
3        Sales        7940.80

Employee Stats Description:
            emp_id            join_date         salary  monthly_sales  \
count    30.000000                   30      30.000000      30.000000   
mean   1015.500000  2017-02-23 00:00:00   83183.400000    6043.266667   
min    1001.000000  2010-01-01 00:00:00   54449.000000    3577.000000   
25%    1008.250000  2013-07-29 00:00:00   69237.750000    4554.500000   
50%    1015.500000  2017-02-23 00:00:00   84854.500000    5897.000000   
75%    1022.750000  2020-09-20 00:00:00   93228.500000    6696.250000   
max    1030.000000  2024-04-17 00:00:00  113569.000000    9961.000000   
std       8.803408                  NaN   17923.305573    1918.667155

       experience_years           tax   net_salary  bonus_percent  \
count         30.000000     30.000000     30.00000      30.000000   
mean           8.000000  14973.012000  68210.38800       0.052333   
min            1.000000   9800.820000  44648.18000       0.030000   
25%            4.250000  12462.795000  56774.95500       0.030000   
50%            8.000000  15273.810000  69580.69000       0.050000   
75%           11.750000  16781.130000  76447.37000       0.050000   
max           15.000000  20442.420000  93126.58000       0.100000   
std            4.394354   3226.195003  14697.11057       0.024167

       monthly_bonus  annual_total  ...  dept_rank     new_salary  perf_score  \
count      30.000000  3.000000e+01  ...  30.000000      30.000000   30.000000   
mean     4320.721667  8.703733e+05  ...   2.666667   84784.186667    0.966667   
min      1735.470000  5.684476e+05  ...   1.000000   54449.000000    0.000000   
25%      2751.517500  7.062251e+05  ...   2.000000   69237.750000    0.000000   
50%      3554.600000  9.011198e+05  ...   3.000000   87755.400000    1.000000   
75%      5556.537500  9.544479e+05  ...   4.000000   93289.500000    1.000000   
max      8867.760000  1.197148e+06  ...   4.000000  121931.700000    3.000000   
std      2061.872362  1.872966e+05  ...   1.154701   18255.231495    0.964305

            sales_z  salary_growth  effective_tax_rate  bonus_efficiency  \
count  3.000000e+01      30.000000        3.000000e+01         30.000000   
mean   1.202742e-16    1600.786667        1.800000e-01          0.695374   
min   -1.285406e+00       0.000000        1.800000e-01          0.427559   
25%   -7.759380e-01       0.000000        1.800000e-01          0.539667   
50%   -7.623348e-02       0.000000        1.800000e-01          0.672981   
75%    3.403317e-01       0.000000        1.800000e-01          0.800241   
max    2.041904e+00   11084.700000        1.800000e-01          1.056824   
std    1.000000e+00    3341.600041        2.775558e-17          0.188540

       net_to_gross_ratio  efficiency_score  team_fit_score  
count        3.000000e+01         30.000000       30.000000  
mean         8.200000e-01        521.664180        0.616667  
min          8.200000e-01        166.211071        0.500000  
25%          8.200000e-01        255.058950        0.500000  
50%          8.200000e-01        379.230183        0.500000  
75%          8.200000e-01        587.154850        0.500000  
max          8.200000e-01       2259.038500        1.000000  
std          8.246530e-17        451.629510        0.215092

[8 rows x 23 columns]

Bottom 5 Performers (Low):
    emp_id   name  monthly_sales performance
24    1025  Emp25           3577         Low
20    1021  Emp21           3725         Low
7     1008   Emp8           3733         Low
18    1019  Emp19           3910         Low
26    1027  Emp27           4010         Low
19    1020  Emp20           4212         Low
6     1007   Emp7           4291         Low
15    1016  Emp16           4485         Low
27    1028  Emp28           4763         Low
16    1017  Emp17           4794         Low

Project Performance Summary:
   project_id    avg_sales  total_employees
0         201  6093.500000                4
1         202  5978.333333                3
2         203  7732.750000                4
3         204  7503.000000                2
4         205  6583.000000                3
5         206  5374.000000                4
6         207  6820.000000                5
7         208  5728.333333                3
8         209  4523.750000                4
9         210  6219.333333                3

Experience Group Summary:
  exp_category        salary  monthly_bonus
0       Junior  81796.333333     3771.06500
1          Mid  86197.333333     3235.45000
2       Senior  78405.875000     4454.57375
3      Veteran  86029.300000     5194.59700


Score: 35

Category: basics