{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "animated-cookbook",
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars as pl"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "assisted-channels",
   "metadata": {
    "tags": []
   },
   "source": [
    "# 表达式\n",
    "\n",
    "`fn(Series) -> Series`\n",
    "\n",
    "* 懒惰评估\n",
    "    - 可以被优化\n",
    "    - 向库提供背景信息，并做出明智的决定\n",
    "* 高度并行\n",
    "* 上下文相关\n",
    "    - 选择 / 投影 -> `Series` = **列、文本或值**\n",
    "    - 聚合 -> `Series` = **组**\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "printable-month",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>fruits</th><th>B</th><th>cars</th><th>optional</th></tr><tr><td>i64</td><td>str</td><td>i64</td><td>str</td><td>i64</td></tr></thead><tbody><tr><td>1</td><td>&quot;banana&quot;</td><td>5</td><td>&quot;beetle&quot;</td><td>28</td></tr><tr><td>2</td><td>&quot;banana&quot;</td><td>4</td><td>&quot;audi&quot;</td><td>300</td></tr><tr><td>3</td><td>&quot;apple&quot;</td><td>3</td><td>&quot;beetle&quot;</td><td>null</td></tr><tr><td>4</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td><td>2</td></tr><tr><td>5</td><td>&quot;banana&quot;</td><td>1</td><td>&quot;beetle&quot;</td><td>-30</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 5)\n",
       "┌─────┬────────┬─────┬────────┬──────────┐\n",
       "│ A   ┆ fruits ┆ B   ┆ cars   ┆ optional │\n",
       "│ --- ┆ ---    ┆ --- ┆ ---    ┆ ---      │\n",
       "│ i64 ┆ str    ┆ i64 ┆ str    ┆ i64      │\n",
       "╞═════╪════════╪═════╪════════╪══════════╡\n",
       "│ 1   ┆ banana ┆ 5   ┆ beetle ┆ 28       │\n",
       "│ 2   ┆ banana ┆ 4   ┆ audi   ┆ 300      │\n",
       "│ 3   ┆ apple  ┆ 3   ┆ beetle ┆ null     │\n",
       "│ 4   ┆ apple  ┆ 2   ┆ beetle ┆ 2        │\n",
       "│ 5   ┆ banana ┆ 1   ┆ beetle ┆ -30      │\n",
       "└─────┴────────┴─────┴────────┴──────────┘"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pl.DataFrame(\n",
    "    {\n",
    "        \"A\": [1, 2, 3, 4, 5],\n",
    "        \"fruits\": [\"banana\", \"banana\", \"apple\", \"apple\", \"banana\"],\n",
    "        \"B\": [5, 4, 3, 2, 1],\n",
    "        \"cars\": [\"beetle\", \"audi\", \"beetle\", \"beetle\", \"beetle\"],\n",
    "        \"optional\": [28, 300, None, 2, -30],\n",
    "    }\n",
    ")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "collectible-jewelry",
   "metadata": {},
   "source": [
    "# 选择上下文"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "minus-mounting",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>B</th><th>literal</th><th>fruits</th></tr><tr><td>i64</td><td>i64</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1</td><td>5</td><td>&quot;B&quot;</td><td>&quot;banana&quot;</td></tr><tr><td>2</td><td>4</td><td>&quot;B&quot;</td><td>&quot;banana&quot;</td></tr><tr><td>3</td><td>3</td><td>&quot;B&quot;</td><td>&quot;apple&quot;</td></tr><tr><td>4</td><td>2</td><td>&quot;B&quot;</td><td>&quot;apple&quot;</td></tr><tr><td>5</td><td>1</td><td>&quot;B&quot;</td><td>&quot;banana&quot;</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 4)\n",
       "┌─────┬─────┬─────────┬────────┐\n",
       "│ A   ┆ B   ┆ literal ┆ fruits │\n",
       "│ --- ┆ --- ┆ ---     ┆ ---    │\n",
       "│ i64 ┆ i64 ┆ str     ┆ str    │\n",
       "╞═════╪═════╪═════════╪════════╡\n",
       "│ 1   ┆ 5   ┆ B       ┆ banana │\n",
       "│ 2   ┆ 4   ┆ B       ┆ banana │\n",
       "│ 3   ┆ 3   ┆ B       ┆ apple  │\n",
       "│ 4   ┆ 2   ┆ B       ┆ apple  │\n",
       "│ 5   ┆ 1   ┆ B       ┆ banana │\n",
       "└─────┴─────┴─────────┴────────┘"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们可以通过名称来选择\n",
    "\n",
    "(df.select([\n",
    "    pl.col(\"A\"),\n",
    "    \"B\",      # \"B\"这一列将会被推理\n",
    "    pl.lit(\"B\"),  # 我们必须告诉Polars, 我们\"B\"的文字信息\n",
    "    pl.col(\"fruits\"),\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "valued-civilization",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (1, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>B</th></tr><tr><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>15</td><td>15</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (1, 2)\n",
       "┌─────┬─────┐\n",
       "│ A   ┆ B   │\n",
       "│ --- ┆ --- │\n",
       "│ i64 ┆ i64 │\n",
       "╞═════╪═════╡\n",
       "│ 15  ┆ 15  │\n",
       "└─────┴─────┘"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 可以通过正则表达式（起始为‘^’终止为'$'）来选择列\n",
    "\n",
    "(df.select([\n",
    "    pl.col(\"^A|B$\").sum() # 求和\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "experimental-mobility",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (1, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>B</th></tr><tr><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>15</td><td>15</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (1, 2)\n",
       "┌─────┬─────┐\n",
       "│ A   ┆ B   │\n",
       "│ --- ┆ --- │\n",
       "│ i64 ┆ i64 │\n",
       "╞═════╪═════╡\n",
       "│ 15  ┆ 15  │\n",
       "└─────┴─────┘"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 可以通过名称选取多个列\n",
    "\n",
    "(df.select([\n",
    "    pl.col([\"A\", \"B\"]).sum() # 求和\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "regional-spencer",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 10)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>fruits</th><th>B</th><th>cars</th><th>optional</th><th>A_reverse</th><th>fruits_reverse</th><th>B_reverse</th><th>cars_reverse</th><th>optional_reverse</th></tr><tr><td>i64</td><td>str</td><td>i64</td><td>str</td><td>i64</td><td>i64</td><td>str</td><td>i64</td><td>str</td><td>i64</td></tr></thead><tbody><tr><td>1</td><td>&quot;banana&quot;</td><td>5</td><td>&quot;beetle&quot;</td><td>28</td><td>5</td><td>&quot;banana&quot;</td><td>1</td><td>&quot;beetle&quot;</td><td>-30</td></tr><tr><td>2</td><td>&quot;banana&quot;</td><td>4</td><td>&quot;audi&quot;</td><td>300</td><td>4</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td><td>2</td></tr><tr><td>3</td><td>&quot;apple&quot;</td><td>3</td><td>&quot;beetle&quot;</td><td>null</td><td>3</td><td>&quot;apple&quot;</td><td>3</td><td>&quot;beetle&quot;</td><td>null</td></tr><tr><td>4</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td><td>2</td><td>2</td><td>&quot;banana&quot;</td><td>4</td><td>&quot;audi&quot;</td><td>300</td></tr><tr><td>5</td><td>&quot;banana&quot;</td><td>1</td><td>&quot;beetle&quot;</td><td>-30</td><td>1</td><td>&quot;banana&quot;</td><td>5</td><td>&quot;beetle&quot;</td><td>28</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 10)\n",
       "┌─────┬────────┬─────┬────────┬───┬────────────────┬───────────┬──────────────┬──────────────────┐\n",
       "│ A   ┆ fruits ┆ B   ┆ cars   ┆ … ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse ┆ optional_reverse │\n",
       "│ --- ┆ ---    ┆ --- ┆ ---    ┆   ┆ ---            ┆ ---       ┆ ---          ┆ ---              │\n",
       "│ i64 ┆ str    ┆ i64 ┆ str    ┆   ┆ str            ┆ i64       ┆ str          ┆ i64              │\n",
       "╞═════╪════════╪═════╪════════╪═══╪════════════════╪═══════════╪══════════════╪══════════════════╡\n",
       "│ 1   ┆ banana ┆ 5   ┆ beetle ┆ … ┆ banana         ┆ 1         ┆ beetle       ┆ -30              │\n",
       "│ 2   ┆ banana ┆ 4   ┆ audi   ┆ … ┆ apple          ┆ 2         ┆ beetle       ┆ 2                │\n",
       "│ 3   ┆ apple  ┆ 3   ┆ beetle ┆ … ┆ apple          ┆ 3         ┆ beetle       ┆ null             │\n",
       "│ 4   ┆ apple  ┆ 2   ┆ beetle ┆ … ┆ banana         ┆ 4         ┆ audi         ┆ 300              │\n",
       "│ 5   ┆ banana ┆ 1   ┆ beetle ┆ … ┆ banana         ┆ 5         ┆ beetle       ┆ 28               │\n",
       "└─────┴────────┴─────┴────────┴───┴────────────────┴───────────┴──────────────┴──────────────────┘"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们可以以正常顺序选择所有事物\n",
    "# 然后我们可以以逆序的方式选择事物\n",
    "\n",
    "(df.select([\n",
    "    pl.all(),\n",
    "    pl.all().reverse().suffix(\"_reverse\") # 表格中所有列逆序（并且文字中后缀为_reverse\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "adopted-waterproof",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 10)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>fruits</th><th>B</th><th>cars</th><th>optional</th><th>A_sum</th><th>fruits_sum</th><th>B_sum</th><th>cars_sum</th><th>optional_sum</th></tr><tr><td>i64</td><td>str</td><td>i64</td><td>str</td><td>i64</td><td>i64</td><td>str</td><td>i64</td><td>str</td><td>i64</td></tr></thead><tbody><tr><td>1</td><td>&quot;banana&quot;</td><td>5</td><td>&quot;beetle&quot;</td><td>28</td><td>15</td><td>null</td><td>15</td><td>null</td><td>300</td></tr><tr><td>2</td><td>&quot;banana&quot;</td><td>4</td><td>&quot;audi&quot;</td><td>300</td><td>15</td><td>null</td><td>15</td><td>null</td><td>300</td></tr><tr><td>3</td><td>&quot;apple&quot;</td><td>3</td><td>&quot;beetle&quot;</td><td>null</td><td>15</td><td>null</td><td>15</td><td>null</td><td>300</td></tr><tr><td>4</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td><td>2</td><td>15</td><td>null</td><td>15</td><td>null</td><td>300</td></tr><tr><td>5</td><td>&quot;banana&quot;</td><td>1</td><td>&quot;beetle&quot;</td><td>-30</td><td>15</td><td>null</td><td>15</td><td>null</td><td>300</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 10)\n",
       "┌─────┬────────┬─────┬────────┬───┬────────────┬───────┬──────────┬──────────────┐\n",
       "│ A   ┆ fruits ┆ B   ┆ cars   ┆ … ┆ fruits_sum ┆ B_sum ┆ cars_sum ┆ optional_sum │\n",
       "│ --- ┆ ---    ┆ --- ┆ ---    ┆   ┆ ---        ┆ ---   ┆ ---      ┆ ---          │\n",
       "│ i64 ┆ str    ┆ i64 ┆ str    ┆   ┆ str        ┆ i64   ┆ str      ┆ i64          │\n",
       "╞═════╪════════╪═════╪════════╪═══╪════════════╪═══════╪══════════╪══════════════╡\n",
       "│ 1   ┆ banana ┆ 5   ┆ beetle ┆ … ┆ null       ┆ 15    ┆ null     ┆ 300          │\n",
       "│ 2   ┆ banana ┆ 4   ┆ audi   ┆ … ┆ null       ┆ 15    ┆ null     ┆ 300          │\n",
       "│ 3   ┆ apple  ┆ 3   ┆ beetle ┆ … ┆ null       ┆ 15    ┆ null     ┆ 300          │\n",
       "│ 4   ┆ apple  ┆ 2   ┆ beetle ┆ … ┆ null       ┆ 15    ┆ null     ┆ 300          │\n",
       "│ 5   ┆ banana ┆ 1   ┆ beetle ┆ … ┆ null       ┆ 15    ┆ null     ┆ 300          │\n",
       "└─────┴────────┴─────┴────────┴───┴────────────┴───────┴──────────┴──────────────┘"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有表达式并行地运行\n",
    "# 单值'Series'被广播到'DataFrame'的形状`\n",
    "\n",
    "(df.select([\n",
    "    pl.all(),\n",
    "    pl.all().sum().suffix(\"_sum\")\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "periodic-employee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th></tr><tr><td>bool</td></tr></thead><tbody><tr><td>true</td></tr><tr><td>true</td></tr><tr><td>false</td></tr><tr><td>false</td></tr><tr><td>true</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 1)\n",
       "┌────────┐\n",
       "│ fruits │\n",
       "│ ---    │\n",
       "│ bool   │\n",
       "╞════════╡\n",
       "│ true   │\n",
       "│ true   │\n",
       "│ false  │\n",
       "│ false  │\n",
       "│ true   │\n",
       "└────────┘"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 专门函数有'str'和'dt'名称空间\n",
    "\n",
    "predicate = pl.col(\"fruits\").str.contains(\"^b.*\")\n",
    "\n",
    "(df.select([\n",
    "    predicate\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "initial-lithuania",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (3, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>fruits</th><th>B</th><th>cars</th><th>optional</th></tr><tr><td>i64</td><td>str</td><td>i64</td><td>str</td><td>i64</td></tr></thead><tbody><tr><td>1</td><td>&quot;banana&quot;</td><td>5</td><td>&quot;beetle&quot;</td><td>28</td></tr><tr><td>2</td><td>&quot;banana&quot;</td><td>4</td><td>&quot;audi&quot;</td><td>300</td></tr><tr><td>5</td><td>&quot;banana&quot;</td><td>1</td><td>&quot;beetle&quot;</td><td>-30</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (3, 5)\n",
       "┌─────┬────────┬─────┬────────┬──────────┐\n",
       "│ A   ┆ fruits ┆ B   ┆ cars   ┆ optional │\n",
       "│ --- ┆ ---    ┆ --- ┆ ---    ┆ ---      │\n",
       "│ i64 ┆ str    ┆ i64 ┆ str    ┆ i64      │\n",
       "╞═════╪════════╪═════╪════════╪══════════╡\n",
       "│ 1   ┆ banana ┆ 5   ┆ beetle ┆ 28       │\n",
       "│ 2   ┆ banana ┆ 4   ┆ audi   ┆ 300      │\n",
       "│ 5   ┆ banana ┆ 1   ┆ beetle ┆ -30      │\n",
       "└─────┴────────┴─────┴────────┴──────────┘"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用谓语（predicate）进行过滤\n",
    "\n",
    "df.filter(predicate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "brutal-irish",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (1, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>some_compute()</th></tr><tr><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>8</td><td>165</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (1, 2)\n",
       "┌─────┬────────────────┐\n",
       "│ A   ┆ some_compute() │\n",
       "│ --- ┆ ---            │\n",
       "│ i64 ┆ i64            │\n",
       "╞═════╪════════════════╡\n",
       "│ 8   ┆ 165            │\n",
       "└─────┴────────────────┘"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 谓语（predicate） 表达式可以被用于过滤\n",
    "\n",
    "(df.select([\n",
    "    pl.col(\"A\").filter(pl.col(\"fruits\").str.contains(\"^b.*\")).sum(),\n",
    "    (pl.col(\"B\").filter(pl.col(\"cars\").str.contains(\"^b.*\")).sum() * pl.col(\"B\").sum()).alias(\"some_compute()\"), # .alias方法表示另起一列\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "treated-intro",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>computed</th></tr><tr><td>f64</td></tr></thead><tbody><tr><td>0.002688</td></tr><tr><td>0.004301</td></tr><tr><td>0.004839</td></tr><tr><td>0.004301</td></tr><tr><td>0.002688</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 1)\n",
       "┌──────────┐\n",
       "│ computed │\n",
       "│ ---      │\n",
       "│ f64      │\n",
       "╞══════════╡\n",
       "│ 0.002688 │\n",
       "│ 0.004301 │\n",
       "│ 0.004839 │\n",
       "│ 0.004301 │\n",
       "│ 0.002688 │\n",
       "└──────────┘"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们可以对列和（文字）值进行算术运算\n",
    "# 可以在程序员不知道的情况下计算为1\n",
    "\n",
    "some_var = 1\n",
    "\n",
    "(df.select([\n",
    "    ((pl.col(\"A\") / 124.0 * pl.col(\"B\")) / pl.sum(\"B\") * some_var).alias(\"computed\")\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "looking-socket",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B</th><th>b</th></tr><tr><td>str</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>5</td><td>5</td></tr><tr><td>&quot;banana&quot;</td><td>4</td><td>4</td></tr><tr><td>&quot;apple&quot;</td><td>3</td><td>-1</td></tr><tr><td>&quot;apple&quot;</td><td>2</td><td>-1</td></tr><tr><td>&quot;banana&quot;</td><td>1</td><td>1</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 3)\n",
       "┌────────┬─────┬─────┐\n",
       "│ fruits ┆ B   ┆ b   │\n",
       "│ ---    ┆ --- ┆ --- │\n",
       "│ str    ┆ i64 ┆ i64 │\n",
       "╞════════╪═════╪═════╡\n",
       "│ banana ┆ 5   ┆ 5   │\n",
       "│ banana ┆ 4   ┆ 4   │\n",
       "│ apple  ┆ 3   ┆ -1  │\n",
       "│ apple  ┆ 2   ┆ -1  │\n",
       "│ banana ┆ 1   ┆ 1   │\n",
       "└────────┴─────┴─────┘"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们可以通过谓词（predicate）来组合列\n",
    "\n",
    "(df.select([\n",
    "    \"fruits\",\n",
    "    \"B\",\n",
    "    pl.when(pl.col(\"fruits\") == \"banana\").then(pl.col(\"B\")).otherwise(-1).alias(\"b\")\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "lesser-worse",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>A</th><th>B</th><th>fold</th></tr><tr><td>i64</td><td>i64</td><td>f64</td></tr></thead><tbody><tr><td>1</td><td>5</td><td>31.5</td></tr><tr><td>2</td><td>4</td><td>23.0</td></tr><tr><td>3</td><td>3</td><td>16.5</td></tr><tr><td>4</td><td>2</td><td>12.0</td></tr><tr><td>5</td><td>1</td><td>9.5</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 3)\n",
       "┌─────┬─────┬──────┐\n",
       "│ A   ┆ B   ┆ fold │\n",
       "│ --- ┆ --- ┆ ---  │\n",
       "│ i64 ┆ i64 ┆ f64  │\n",
       "╞═════╪═════╪══════╡\n",
       "│ 1   ┆ 5   ┆ 31.5 │\n",
       "│ 2   ┆ 4   ┆ 23.0 │\n",
       "│ 3   ┆ 3   ┆ 16.5 │\n",
       "│ 4   ┆ 2   ┆ 12.0 │\n",
       "│ 5   ┆ 1   ┆ 9.5  │\n",
       "└─────┴─────┴──────┘"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们可以在列这一层通过折叠操作组合列\n",
    "\n",
    "(df.select([\n",
    "    \"A\",\n",
    "    \"B\",\n",
    "    pl.fold(0, lambda a, b: a + b, [pl.col(\"A\"), \"B\", pl.col(\"B\")**2, pl.col(\"A\") / 2.0]).alias(\"fold\")\n",
    "]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "mighty-civilization",
   "metadata": {},
   "source": [
    "# 聚合上下文\n",
    "* 表达式应用于组而不是列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "charitable-organ",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (2, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B_sum</th><th>B_sum2</th><th>fruits_first</th><th>count</th><th>cars</th></tr><tr><td>str</td><td>i64</td><td>i64</td><td>str</td><td>u32</td><td>list[str]</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>[&quot;beetle&quot;, &quot;beetle&quot;, &quot;audi&quot;]</td></tr><tr><td>&quot;apple&quot;</td><td>5</td><td>5</td><td>&quot;apple&quot;</td><td>2</td><td>[&quot;beetle&quot;, &quot;beetle&quot;]</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (2, 6)\n",
       "┌────────┬───────┬────────┬──────────────┬───────┬──────────────────────────────┐\n",
       "│ fruits ┆ B_sum ┆ B_sum2 ┆ fruits_first ┆ count ┆ cars                         │\n",
       "│ ---    ┆ ---   ┆ ---    ┆ ---          ┆ ---   ┆ ---                          │\n",
       "│ str    ┆ i64   ┆ i64    ┆ str          ┆ u32   ┆ list[str]                    │\n",
       "╞════════╪═══════╪════════╪══════════════╪═══════╪══════════════════════════════╡\n",
       "│ banana ┆ 10    ┆ 10     ┆ banana       ┆ 3     ┆ [\"beetle\", \"beetle\", \"audi\"] │\n",
       "│ apple  ┆ 5     ┆ 5      ┆ apple        ┆ 2     ┆ [\"beetle\", \"beetle\"]         │\n",
       "└────────┴───────┴────────┴──────────────┴───────┴──────────────────────────────┘"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们仍可以组合很多表达式\n",
    "\n",
    "(df.sort(\"cars\").group_by(\"fruits\")\n",
    "    .agg([\n",
    "        pl.col(\"B\").sum().alias(\"B_sum\"),\n",
    "        pl.sum(\"B\").alias(\"B_sum2\"),  # 第一个是语法糖\n",
    "        pl.first(\"fruits\").alias(\"fruits_first\"),\n",
    "        pl.count(\"A\").alias(\"count\"),\n",
    "        pl.col(\"cars\").reverse()\n",
    "    ]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "bacterial-comparative",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B_sum</th><th>B_sum2</th><th>fruits_first</th><th>count</th><th>cars</th></tr><tr><td>str</td><td>i64</td><td>i64</td><td>str</td><td>u32</td><td>str</td></tr></thead><tbody><tr><td>&quot;apple&quot;</td><td>5</td><td>5</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;apple&quot;</td><td>5</td><td>5</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>&quot;audi&quot;</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 6)\n",
       "┌────────┬───────┬────────┬──────────────┬───────┬────────┐\n",
       "│ fruits ┆ B_sum ┆ B_sum2 ┆ fruits_first ┆ count ┆ cars   │\n",
       "│ ---    ┆ ---   ┆ ---    ┆ ---          ┆ ---   ┆ ---    │\n",
       "│ str    ┆ i64   ┆ i64    ┆ str          ┆ u32   ┆ str    │\n",
       "╞════════╪═══════╪════════╪══════════════╪═══════╪════════╡\n",
       "│ apple  ┆ 5     ┆ 5      ┆ apple        ┆ 2     ┆ beetle │\n",
       "│ apple  ┆ 5     ┆ 5      ┆ apple        ┆ 2     ┆ beetle │\n",
       "│ banana ┆ 10    ┆ 10     ┆ banana       ┆ 3     ┆ beetle │\n",
       "│ banana ┆ 10    ┆ 10     ┆ banana       ┆ 3     ┆ beetle │\n",
       "│ banana ┆ 10    ┆ 10     ┆ banana       ┆ 3     ┆ audi   │\n",
       "└────────┴───────┴────────┴──────────────┴───────┴────────┘"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们可以从“汽车”列下手\n",
    "\n",
    "(df.sort(\"cars\").group_by(\"fruits\")\n",
    "    .agg([\n",
    "        pl.col(\"B\").sum().alias(\"B_sum\"),\n",
    "        pl.sum(\"B\").alias(\"B_sum2\"),  # 第一个是语法糖\n",
    "        pl.first(\"fruits\").alias(\"fruits_first\"),\n",
    "        pl.count(\"A\").alias(\"count\"),\n",
    "        pl.col(\"cars\").reverse()\n",
    "    ])).explode(\"cars\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "willing-vancouver",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B_sum</th><th>B_sum2</th><th>fruits_first</th><th>count</th><th>B_shifted</th></tr><tr><td>str</td><td>i64</td><td>i64</td><td>str</td><td>u32</td><td>i64</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>null</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>5</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>4</td></tr><tr><td>&quot;apple&quot;</td><td>5</td><td>5</td><td>&quot;apple&quot;</td><td>2</td><td>null</td></tr><tr><td>&quot;apple&quot;</td><td>5</td><td>5</td><td>&quot;apple&quot;</td><td>2</td><td>3</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 6)\n",
       "┌────────┬───────┬────────┬──────────────┬───────┬───────────┐\n",
       "│ fruits ┆ B_sum ┆ B_sum2 ┆ fruits_first ┆ count ┆ B_shifted │\n",
       "│ ---    ┆ ---   ┆ ---    ┆ ---          ┆ ---   ┆ ---       │\n",
       "│ str    ┆ i64   ┆ i64    ┆ str          ┆ u32   ┆ i64       │\n",
       "╞════════╪═══════╪════════╪══════════════╪═══════╪═══════════╡\n",
       "│ banana ┆ 10    ┆ 10     ┆ banana       ┆ 3     ┆ null      │\n",
       "│ banana ┆ 10    ┆ 10     ┆ banana       ┆ 3     ┆ 5         │\n",
       "│ banana ┆ 10    ┆ 10     ┆ banana       ┆ 3     ┆ 4         │\n",
       "│ apple  ┆ 5     ┆ 5      ┆ apple        ┆ 2     ┆ null      │\n",
       "│ apple  ┆ 5     ┆ 5      ┆ apple        ┆ 2     ┆ 3         │\n",
       "└────────┴───────┴────────┴──────────────┴───────┴───────────┘"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(df.group_by(\"fruits\")\n",
    "    .agg([\n",
    "        pl.col(\"B\").sum().alias(\"B_sum\"),\n",
    "        pl.sum(\"B\").alias(\"B_sum2\"),  # 第一个是语法糖\n",
    "        pl.first(\"fruits\").alias(\"fruits_first\"),\n",
    "        pl.count(),\n",
    "        pl.col(\"B\").shift().alias(\"B_shifted\")\n",
    "    ])\n",
    " .explode(\"B_shifted\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "46e1ce19",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B</th><th>B_sum2</th><th>fruits_first</th><th>count</th><th>cars</th></tr><tr><td>str</td><td>i64</td><td>i64</td><td>str</td><td>u32</td><td>str</td></tr></thead><tbody><tr><td>&quot;apple&quot;</td><td>5</td><td>5</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;apple&quot;</td><td>5</td><td>5</td><td>&quot;apple&quot;</td><td>2</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>&quot;beetle&quot;</td></tr><tr><td>&quot;banana&quot;</td><td>10</td><td>10</td><td>&quot;banana&quot;</td><td>3</td><td>&quot;audi&quot;</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 6)\n",
       "┌────────┬─────┬────────┬──────────────┬───────┬────────┐\n",
       "│ fruits ┆ B   ┆ B_sum2 ┆ fruits_first ┆ count ┆ cars   │\n",
       "│ ---    ┆ --- ┆ ---    ┆ ---          ┆ ---   ┆ ---    │\n",
       "│ str    ┆ i64 ┆ i64    ┆ str          ┆ u32   ┆ str    │\n",
       "╞════════╪═════╪════════╪══════════════╪═══════╪════════╡\n",
       "│ apple  ┆ 5   ┆ 5      ┆ apple        ┆ 2     ┆ beetle │\n",
       "│ apple  ┆ 5   ┆ 5      ┆ apple        ┆ 2     ┆ beetle │\n",
       "│ banana ┆ 10  ┆ 10     ┆ banana       ┆ 3     ┆ beetle │\n",
       "│ banana ┆ 10  ┆ 10     ┆ banana       ┆ 3     ┆ beetle │\n",
       "│ banana ┆ 10  ┆ 10     ┆ banana       ┆ 3     ┆ audi   │\n",
       "└────────┴─────┴────────┴──────────────┴───────┴────────┘"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们可以从“汽车”列下手\n",
    "\n",
    "(df.sort(\"cars\").group_by(\"fruits\")\n",
    "    .agg([\n",
    "        pl.col(\"B\").sum(),\n",
    "        pl.sum(\"B\").alias(\"B_sum2\"),  # 第一个是语法糖\n",
    "        pl.first(\"fruits\").alias(\"fruits_first\"),\n",
    "        pl.count(\"A\").alias(\"count\"),\n",
    "        pl.col(\"cars\").reverse()\n",
    "    ])).explode(\"cars\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "novel-attribute",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (2, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>shift_B</th><th>rev_B</th></tr><tr><td>str</td><td>list[i64]</td><td>list[i64]</td></tr></thead><tbody><tr><td>&quot;apple&quot;</td><td>[null, 3]</td><td>[2, 3]</td></tr><tr><td>&quot;banana&quot;</td><td>[null, 5, 4]</td><td>[1, 4, 5]</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (2, 3)\n",
       "┌────────┬──────────────┬───────────┐\n",
       "│ fruits ┆ shift_B      ┆ rev_B     │\n",
       "│ ---    ┆ ---          ┆ ---       │\n",
       "│ str    ┆ list[i64]    ┆ list[i64] │\n",
       "╞════════╪══════════════╪═══════════╡\n",
       "│ apple  ┆ [null, 3]    ┆ [2, 3]    │\n",
       "│ banana ┆ [null, 5, 4] ┆ [1, 4, 5] │\n",
       "└────────┴──────────────┴───────────┘"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们也可以得到一列组\n",
    "\n",
    "(df.group_by(\"fruits\")\n",
    "    .agg([\n",
    "         pl.col(\"B\").shift().alias(\"shift_B\"),\n",
    "         pl.col(\"B\").reverse().alias(\"rev_B\"),\n",
    "    ]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "biological-cooper",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (2, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B</th></tr><tr><td>str</td><td>list[list[i64]]</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>[[5, 4]]</td></tr><tr><td>&quot;apple&quot;</td><td>[[3, 2]]</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (2, 2)\n",
       "┌────────┬─────────────────┐\n",
       "│ fruits ┆ B               │\n",
       "│ ---    ┆ ---             │\n",
       "│ str    ┆ list[list[i64]] │\n",
       "╞════════╪═════════════════╡\n",
       "│ banana ┆ [[5, 4]]        │\n",
       "│ apple  ┆ [[3, 2]]        │\n",
       "└────────┴─────────────────┘"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们也可以在groupby中使用谓词\n",
    "\n",
    "(df.group_by(\"fruits\")\n",
    "    .agg([\n",
    "        pl.col(\"B\").filter(pl.col(\"B\") > 1).implode().keep_name(),\n",
    "    ]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "linear-stability",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (2, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B</th></tr><tr><td>str</td><td>f64</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>4.5</td></tr><tr><td>&quot;apple&quot;</td><td>2.5</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (2, 2)\n",
       "┌────────┬─────┐\n",
       "│ fruits ┆ B   │\n",
       "│ ---    ┆ --- │\n",
       "│ str    ┆ f64 │\n",
       "╞════════╪═════╡\n",
       "│ banana ┆ 4.5 │\n",
       "│ apple  ┆ 2.5 │\n",
       "└────────┴─────┘"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 并且只根据谓词为真的值求和\n",
    "\n",
    "(df.group_by(\"fruits\")\n",
    "    .agg([\n",
    "        pl.col(\"B\").filter(pl.col(\"B\") > 1).mean(),\n",
    "    ]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "distinct-master",
   "metadata": {},
   "source": [
    "# 窗口函数!\n",
    "\n",
    "* 用“超能力表”达。\n",
    "* 选择上下文中的聚合\n",
    "\n",
    "\n",
    "```python\n",
    "pl.col(\"foo\").aggregation_expression(..).over(\"column_used_to_group\")\n",
    "```\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "chief-eight",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>cars</th><th>B</th><th>B_sum_by_fruits</th><th>B_sum_by_cars</th></tr><tr><td>str</td><td>str</td><td>i64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>&quot;beetle&quot;</td><td>5</td><td>10</td><td>11</td></tr><tr><td>&quot;banana&quot;</td><td>&quot;audi&quot;</td><td>4</td><td>10</td><td>4</td></tr><tr><td>&quot;apple&quot;</td><td>&quot;beetle&quot;</td><td>3</td><td>5</td><td>11</td></tr><tr><td>&quot;apple&quot;</td><td>&quot;beetle&quot;</td><td>2</td><td>5</td><td>11</td></tr><tr><td>&quot;banana&quot;</td><td>&quot;beetle&quot;</td><td>1</td><td>10</td><td>11</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 5)\n",
       "┌────────┬────────┬─────┬─────────────────┬───────────────┐\n",
       "│ fruits ┆ cars   ┆ B   ┆ B_sum_by_fruits ┆ B_sum_by_cars │\n",
       "│ ---    ┆ ---    ┆ --- ┆ ---             ┆ ---           │\n",
       "│ str    ┆ str    ┆ i64 ┆ i64             ┆ i64           │\n",
       "╞════════╪════════╪═════╪═════════════════╪═══════════════╡\n",
       "│ banana ┆ beetle ┆ 5   ┆ 10              ┆ 11            │\n",
       "│ banana ┆ audi   ┆ 4   ┆ 10              ┆ 4             │\n",
       "│ apple  ┆ beetle ┆ 3   ┆ 5               ┆ 11            │\n",
       "│ apple  ┆ beetle ┆ 2   ┆ 5               ┆ 11            │\n",
       "│ banana ┆ beetle ┆ 1   ┆ 10              ┆ 11            │\n",
       "└────────┴────────┴─────┴─────────────────┴───────────────┘"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# groupby 2个不同的列\n",
    "\n",
    "(df.select([\n",
    "    \"fruits\",\n",
    "    \"cars\",\n",
    "    \"B\",\n",
    "    pl.col(\"B\").sum().over(\"fruits\").alias(\"B_sum_by_fruits\"),\n",
    "    pl.col(\"B\").sum().over(\"cars\").alias(\"B_sum_by_cars\"),\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "flexible-medium",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B</th><th>B_reversed_by_fruits</th></tr><tr><td>str</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>5</td><td>1</td></tr><tr><td>&quot;banana&quot;</td><td>4</td><td>4</td></tr><tr><td>&quot;apple&quot;</td><td>3</td><td>2</td></tr><tr><td>&quot;apple&quot;</td><td>2</td><td>3</td></tr><tr><td>&quot;banana&quot;</td><td>1</td><td>5</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 3)\n",
       "┌────────┬─────┬──────────────────────┐\n",
       "│ fruits ┆ B   ┆ B_reversed_by_fruits │\n",
       "│ ---    ┆ --- ┆ ---                  │\n",
       "│ str    ┆ i64 ┆ i64                  │\n",
       "╞════════╪═════╪══════════════════════╡\n",
       "│ banana ┆ 5   ┆ 1                    │\n",
       "│ banana ┆ 4   ┆ 4                    │\n",
       "│ apple  ┆ 3   ┆ 2                    │\n",
       "│ apple  ┆ 2   ┆ 3                    │\n",
       "│ banana ┆ 1   ┆ 5                    │\n",
       "└────────┴─────┴──────────────────────┘"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按组反转B，并在原始df中显示结果\n",
    "\n",
    "(df.select([\n",
    "    \"fruits\",\n",
    "    \"B\",\n",
    "    pl.col(\"B\").reverse().over(\"fruits\").alias(\"B_reversed_by_fruits\")\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "specialized-figure",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr > th,\n",
       ".dataframe > tbody > tr > td {\n",
       "  text-align: right;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>fruits</th><th>B</th><th>lag_B_by_fruits</th></tr><tr><td>str</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;banana&quot;</td><td>5</td><td>null</td></tr><tr><td>&quot;banana&quot;</td><td>4</td><td>5</td></tr><tr><td>&quot;apple&quot;</td><td>3</td><td>null</td></tr><tr><td>&quot;apple&quot;</td><td>2</td><td>3</td></tr><tr><td>&quot;banana&quot;</td><td>1</td><td>4</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 3)\n",
       "┌────────┬─────┬─────────────────┐\n",
       "│ fruits ┆ B   ┆ lag_B_by_fruits │\n",
       "│ ---    ┆ --- ┆ ---             │\n",
       "│ str    ┆ i64 ┆ i64             │\n",
       "╞════════╪═════╪═════════════════╡\n",
       "│ banana ┆ 5   ┆ null            │\n",
       "│ banana ┆ 4   ┆ 5               │\n",
       "│ apple  ┆ 3   ┆ null            │\n",
       "│ apple  ┆ 2   ┆ 3               │\n",
       "│ banana ┆ 1   ┆ 4               │\n",
       "└────────┴─────┴─────────────────┘"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 将一列置于“fruits”中\n",
    "\n",
    "(df.select([\n",
    "    \"fruits\",\n",
    "    \"B\",\n",
    "    pl.col(\"B\").shift().over(\"fruits\").alias(\"lag_B_by_fruits\")\n",
    "]))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
