importloggingfromtypingimportLiteralimportrequests# Set up logginglogger=logging.getLogger(__name__)
[docs]deffetch_content(url,return_format:Literal["text","markdown","html","screenshot"]="markdown",disable_gfm:Literal[False,True,"table"]=False,bypass_cache=False,with_generated_alt=False,remove_images=False,timeout=None,json_response=True,with_links_summary=False,with_images_summary=False,):""" Fetch content from a specified URL using the r.jina.ai API. :param url: The URL to fetch content from. :param return_format: The format of the returned content, options are 'text', 'markdown', 'html', 'screenshot'. :param json_response: Boolean flag to toggle JSON response format. :param bypass_cache: Boolean flag to bypass cache. :param disable_gfm: Boolean flag to disable Github Flavored Markdown or 'table' mode. :param remove_images: Boolean flag to remove all images from the response. :param timeout: Timeout in seconds for waiting the webpage to load. :param with_generated_alt: Boolean flag to include generated alt text for images. :param with_links_summary: Boolean flag to include a summary of all links at the end. :param with_images_summary: Boolean flag to include a summary of all images at the end. :return: The fetched content in the specified format. """api_url=f"https://r.jina.ai/{url}"headers={"X-Return-Format":return_format}ifbypass_cache:headers["X-No-Cache"]="true"ifremove_images:headers["X-Retain-Images"]="none"ifdisable_gfmisnotFalse:headers["X-No-Gfm"]="true"ifdisable_gfm==Trueelse"table"iftimeoutisnotNone:headers["X-Timeout"]=str(timeout)ifwith_generated_alt:headers["X-With-Generated-Alt"]="true"ifwith_links_summary:headers["X-With-Links-Summary"]="true"ifwith_images_summary:headers["X-With-Images-Summary"]="true"ifjson_response:headers["Accept"]="application/json"try:response=requests.get(api_url,headers=headers,timeout=10)response.raise_for_status()content=response.textlogger.info(f"Successfully fetched content from: {url} in {return_format} format")exceptrequests.exceptions.RequestExceptionase:content=f"error: {str(e)}"logger.error(f"Failed to fetch content from {url}. Error: {str(e)}")returncontent