diff --git a/apiserver/plane/api/views/issue.py b/apiserver/plane/api/views/issue.py index efbdf07f9b..9e8579526f 100644 --- a/apiserver/plane/api/views/issue.py +++ b/apiserver/plane/api/views/issue.py @@ -58,7 +58,7 @@ from plane.bgtasks.storage_metadata_task import get_asset_object_metadata from .base import BaseAPIView from plane.utils.host import base_host from plane.bgtasks.webhook_task import model_activity - +from plane.bgtasks.work_item_link_task import crawl_work_item_link_title class WorkspaceIssueAPIEndpoint(BaseAPIView): """ @@ -692,6 +692,9 @@ class IssueLinkAPIEndpoint(BaseAPIView): serializer = IssueLinkSerializer(data=request.data) if serializer.is_valid(): serializer.save(project_id=project_id, issue_id=issue_id) + crawl_work_item_link_title.delay( + serializer.data.get("id"), serializer.data.get("url") + ) link = IssueLink.objects.get(pk=serializer.data["id"]) link.created_by_id = request.data.get("created_by", request.user.id) @@ -719,6 +722,9 @@ class IssueLinkAPIEndpoint(BaseAPIView): serializer = IssueLinkSerializer(issue_link, data=request.data, partial=True) if serializer.is_valid(): serializer.save() + crawl_work_item_link_title.delay( + serializer.data.get("id"), serializer.data.get("url") + ) issue_activity.delay( type="link.activity.updated", requested_data=requested_data, diff --git a/apiserver/plane/app/views/issue/link.py b/apiserver/plane/app/views/issue/link.py index 099550b846..0a574dc19e 100644 --- a/apiserver/plane/app/views/issue/link.py +++ b/apiserver/plane/app/views/issue/link.py @@ -45,7 +45,7 @@ class IssueLinkViewSet(BaseViewSet): serializer = IssueLinkSerializer(data=request.data) if serializer.is_valid(): serializer.save(project_id=project_id, issue_id=issue_id) - crawl_work_item_link_title( + crawl_work_item_link_title.delay( serializer.data.get("id"), serializer.data.get("url") ) issue_activity.delay( @@ -78,7 +78,7 @@ class IssueLinkViewSet(BaseViewSet): serializer = IssueLinkSerializer(issue_link, data=request.data, partial=True) if serializer.is_valid(): serializer.save() - crawl_work_item_link_title( + crawl_work_item_link_title.delay( serializer.data.get("id"), serializer.data.get("url") ) diff --git a/apiserver/plane/bgtasks/work_item_link_task.py b/apiserver/plane/bgtasks/work_item_link_task.py index 9a3ac265e1..1ba48caf91 100644 --- a/apiserver/plane/bgtasks/work_item_link_task.py +++ b/apiserver/plane/bgtasks/work_item_link_task.py @@ -19,17 +19,6 @@ logger = logging.getLogger("plane.worker") DEFAULT_FAVICON = "PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGNsYXNzPSJsdWNpZGUgbHVjaWRlLWxpbmstaWNvbiBsdWNpZGUtbGluayI+PHBhdGggZD0iTTEwIDEzYTUgNSAwIDAgMCA3LjU0LjU0bDMtM2E1IDUgMCAwIDAtNy4wNy03LjA3bC0xLjcyIDEuNzEiLz48cGF0aCBkPSJNMTQgMTFhNSA1IDAgMCAwLTcuNTQtLjU0bC0zIDNhNSA1IDAgMCAwIDcuMDcgNy4wN2wxLjcxLTEuNzEiLz48L3N2Zz4=" # noqa: E501 - -@shared_task -def crawl_work_item_link_title(id: str, url: str) -> None: - meta_data = crawl_work_item_link_title_and_favicon(url) - issue_link = IssueLink.objects.get(id=id) - - issue_link.metadata = meta_data - - issue_link.save() - - def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: """ Crawls a URL to extract the title and favicon. @@ -57,17 +46,18 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa: E501 } - # Fetch the main page - response = requests.get(url, headers=headers, timeout=2) + soup = None + title = None - response.raise_for_status() + try: + response = requests.get(url, headers=headers, timeout=1) - # Parse HTML - soup = BeautifulSoup(response.content, "html.parser") + soup = BeautifulSoup(response.content, "html.parser") + title_tag = soup.find("title") + title = title_tag.get_text().strip() if title_tag else None - # Extract title - title_tag = soup.find("title") - title = title_tag.get_text().strip() if title_tag else None + except requests.RequestException as e: + logger.warning(f"Failed to fetch HTML for title: {str(e)}") # Fetch and encode favicon favicon_base64 = fetch_and_encode_favicon(headers, soup, url) @@ -82,14 +72,6 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: return result - except requests.RequestException as e: - log_exception(e) - return { - "error": f"Request failed: {str(e)}", - "title": None, - "favicon": None, - "url": url, - } except Exception as e: log_exception(e) return { @@ -100,7 +82,7 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: } -def find_favicon_url(soup: BeautifulSoup, base_url: str) -> Optional[str]: +def find_favicon_url(soup: Optional[BeautifulSoup], base_url: str) -> Optional[str]: """ Find the favicon URL from HTML soup. @@ -111,18 +93,20 @@ def find_favicon_url(soup: BeautifulSoup, base_url: str) -> Optional[str]: Returns: str: Absolute URL to favicon or None """ - # Look for various favicon link tags - favicon_selectors = [ - 'link[rel="icon"]', - 'link[rel="shortcut icon"]', - 'link[rel="apple-touch-icon"]', - 'link[rel="apple-touch-icon-precomposed"]', - ] - for selector in favicon_selectors: - favicon_tag = soup.select_one(selector) - if favicon_tag and favicon_tag.get("href"): - return urljoin(base_url, favicon_tag["href"]) + if soup is not None: + # Look for various favicon link tags + favicon_selectors = [ + 'link[rel="icon"]', + 'link[rel="shortcut icon"]', + 'link[rel="apple-touch-icon"]', + 'link[rel="apple-touch-icon-precomposed"]', + ] + + for selector in favicon_selectors: + favicon_tag = soup.select_one(selector) + if favicon_tag and favicon_tag.get("href"): + return urljoin(base_url, favicon_tag["href"]) # Fallback to /favicon.ico parsed_url = urlparse(base_url) @@ -131,7 +115,6 @@ def find_favicon_url(soup: BeautifulSoup, base_url: str) -> Optional[str]: # Check if fallback exists try: response = requests.head(fallback_url, timeout=2) - response.raise_for_status() if response.status_code == 200: return fallback_url except requests.RequestException as e: @@ -142,8 +125,8 @@ def find_favicon_url(soup: BeautifulSoup, base_url: str) -> Optional[str]: def fetch_and_encode_favicon( - headers: Dict[str, str], soup: BeautifulSoup, url: str -) -> Optional[Dict[str, str]]: + headers: Dict[str, str], soup: Optional[BeautifulSoup], url: str +) -> Dict[str, Optional[str]]: """ Fetch favicon and encode it as base64. @@ -162,8 +145,7 @@ def fetch_and_encode_favicon( "favicon_base64": f"data:image/svg+xml;base64,{DEFAULT_FAVICON}", } - response = requests.get(favicon_url, headers=headers, timeout=2) - response.raise_for_status() + response = requests.get(favicon_url, headers=headers, timeout=1) # Get content type content_type = response.headers.get("content-type", "image/x-icon") @@ -183,3 +165,13 @@ def fetch_and_encode_favicon( "favicon_url": None, "favicon_base64": f"data:image/svg+xml;base64,{DEFAULT_FAVICON}", } + + +@shared_task +def crawl_work_item_link_title(id: str, url: str) -> None: + meta_data = crawl_work_item_link_title_and_favicon(url) + issue_link = IssueLink.objects.get(id=id) + + issue_link.metadata = meta_data + + issue_link.save()