Reference

Page Classes and Functions

class PageBase(**kwargs)
__getitem__(key)
__setitem__(key, value)
do_load()
fetch()
get_data()
property loader
prune()
raw = <otscrape.core.base.extractor.Extractor object>
class CSVLinePage(**kwargs)
loader = <otscrape.core.loader.file.csv.CSVFileLoader object>
class DataPage(data)
loader = <otscrape.core.loader.dummy.DummyLoader object>
class FileLinePage(**kwargs)
loader = <otscrape.core.loader.file.file.LineLoader object>
class JSONLinePage(**kwargs)
loader = <otscrape.core.loader.file.json.JSONFileLoader object>
class Page(url=None, **kwargs)
loader = <otscrape.core.loader.request.loader.SimpleRequestLoader object>

Extractor Classes and Functions

class Extractor(target=None, project=True, replace_error=None)
on_error(*args, **kwargs)
class Chain(extractors: List[Union[otscrape.core.base.extractor.Extractor, Callable]], *, target=None, project=True)
extract(page, cache)
class ChainMap(extractors: List[Union[otscrape.core.base.extractor.Extractor, Callable]], *, target=None, project=True)
class DictPath(path='/', target=None, *, project=True, replace_error=None)
extract(page, cache)
class ETree(target=None, project=True, replace_error=None)
extract(page, cache)
class FileContent(target=None, *, project=True, replace_error=None, **kwargs)
extract(page, cache)
class FileLineNumber(target=None, *, project=True, replace_error=None, **kwargs)
extract(page, cache)
class FileName(target=None, *, project=True, replace_error=None, **kwargs)
extract(page, cache)
class JSON(path='/', target=None, *, project=True, replace_error=None, **kwargs)
extract(page, cache)
class JSONDict(target=None, *, project=True, replace_error=None, **kwargs)
extract(page, cache)
class Lambda(func, target=None, project=True, replace_error=None)
extract(page, cache)
class Map(func, target=None, project=True, replace_error=None)
class Raw(*, project=True, replace_error=None)
extract(page, cache)
class RegEx(pattern, flags='', only_first=False, select=None, target=None, *, project=True, replace_error=None, **kwargs)
extract(page, cache)
class RequestJSON(target=None, project=True, replace_error=None)
extract(page, cache)
class RequestStatusCode(target=None, project=True, replace_error=None)
extract(page, cache)
class RequestText(target=None, *, bytes_result=False, encoding=None, project=True, replace_error=None)
extract(page, cache)
class SoupFindAll(name=None, attrs={}, recursive=True, string=None, limit=None, *, default_parser='html.parser', target=None, project=True, replace_error=None, **kwargs)
extract(page, cache)
class SoupSelect(selector, namespaces=None, limit=None, *, default_parser='html.parser', multiple=True, target=None, project=True, replace_error=None, **kwargs)
extract(page, cache)
class StarLambda(func, target=None, project=True, replace_error=None)
extract(page, cache)
class StarMap(func, target=None, project=True, replace_error=None)
class TextSoup(parser='html.parser', target=None, *, project=True, replace_error=None, **kwargs)
extract(page, cache)
class XPath(xpath='.', *, target=None, only_first=False, encoding=None, project=True, replace_error=None)
extract(page, cache)
class ZipDict(structure=None, *, project=True, replace_error=None)
extract(page, cache)
class Attribute(target=None, project=True, replace_error=None)
extractor(func=None, *, project=True, replace_error=None)

Exporter Classes and Functions

class Exporter(queue_size=0, queue_timeout=3, parallel=False)
close()
export(data)
join_queue()
join_worker()
on_close()
on_error(*args, **kwargs)
on_open()
open()
class JSONExporter(filename, mode='w', encoding=None, lines=True, **kwargs)
get_data_to_write(data)

Loader Classes and Functions

class Loader(rate_limit='')
check_available(lock=True)
do_load(*args, **kwargs)
do_on_loading()
get_available_time()
on_available()
on_loaded()
on_loading()
class CSVFileLoader(filenames=None, rate_limit='', fetch_size=None, skiprows=0, parallel=False, **kwargs)
calculate_tot_line()
do_load()
get_line_reader(filename)
class DummyLoader(data=None, rate_limit='')
do_load(data=None)
class JSONFileLoader(filenames=None, rate_limit='', fetch_size=None, skiprows=0, parallel=False, **kwargs)
do_load()
class LineLoader(filenames=None, rate_limit='', fetch_size=None, skiprows=0, parallel=False, **kwargs)
calculate_tot_line()
do_load()
get_fetcher()
get_line_reader(filename)
get_manager()
on_loading()
reset()
class SimpleRequestLoader(method=None, accept_status_codes=(200), max_retries=0, delay=0, replace_error=None, **kwargs)
on_error(*args, **kwargs)

Worker Classes and Functions

class Workers(n_workers=None, state=None, restart=False)
export(page, exporter, **kwargs)
list(elements=None)
scrape(page, buffer='FIFO', buffer_size=0, buffer_timeout=3.0)
PickleState(filename, replace=False)
class PickleStateBase(filename=None)
static clean(filename)
complete()
static exists(filename)
get_subpath(name)
hold()
is_complete(state=None, name=None)
is_waiting(state)
iter(it, if_exists='skip', key=None)
static load(filename)
notify(other)
release()
reset()
save()
substate(name=None, suffix=None)
try_complete()
wait_for(other, notify=True)