Source code for text_extensions_for_pandas.jupyter.misc

#
#  Copyright (c) 2021 IBM Corp.
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

#
# misc.py
#
# Part of text_extensions_for_pandas
#
# Home for various functions too limited in scope or size to justify a separate module
#

import pandas as pd
import time
from typing import *


[docs]def run_with_progress_bar(num_items: int, fn: Callable, item_type: str = "doc") \ -> List[pd.DataFrame]: """ Display a progress bar while iterating over a list of dataframes. :param num_items: Number of items to iterate over :param fn: A function that accepts a single integer argument -- let's call it `i` -- and performs processing for document `i` and returns a `pd.DataFrame` of results :param item_type: Human-readable name for the items that the calling code is iterating over """ # Imports inline to avoid creating a hard dependency on ipywidgets/IPython # for programs that don't call this funciton. # noinspection PyPackageRequirements import ipywidgets # noinspection PyPackageRequirements from IPython.display import display _UPDATE_SEC = 0.1 result = [] # Type: List[pd.DataFrame] last_update = time.time() progress_bar = ipywidgets.IntProgress(0, 0, num_items, description="Starting...", layout=ipywidgets.Layout(width="100%"), style={"description_width": "12%"}) display(progress_bar) for i in range(num_items): result.append(fn(i)) now = time.time() if i == num_items - 1 or now - last_update >= _UPDATE_SEC: progress_bar.value = i + 1 progress_bar.description = f"{i + 1}/{num_items} {item_type}s" last_update = now progress_bar.bar_style = "success" return result