import Bannnerimage from "../../../../images/logo/logo_lakebrains.webp";

const data = [
  {
    slugurl: "/Acuity",
    data: [
      {
        Banner: [
          {
            sector:"Financial/ Technology",
            headingtext: "Generic Web Scraper (PDF Generator)",
            headingpara:
              "Generic Web Scraper is a Javascript (Node Js) Implementation which is responsible for opening a new browser then switch to the given url and generate pdf files.",
            image: Bannnerimage,
          },
        ],
        heading:
          "Generic solution to scrap or generate PDFof any URL",
        casestudy: {
          Problem:
            `Generic Web Scraper is a Javascript (Node Js) Implementation which is responsible for opening a new browser then switch to the given url and generate pdf files, Extract text from the given url and export html of the url and save all these files on S3 Bucket.
            There are some specifications like:
            All people's bio should be in the pdf, It should capture all the hidden content with images, cookie pop ups should not be visible and so on`,
          challenge: [
            {
              heading: "The Challenges:- ",
              challengeli: [
                { First: "Remove or accept cookies popups (need to remove that popups because content can be hidden behind these pop ups)" },
                { First: "Image Break in the pdf (Image should not be split into two pages e.g. first half of image in page1, second half in page 2.)" },
                { First: "Single page toggle sections: On page load only the first section gets expanded but we want all sections’ content available in the print i.e. all sections’ should be expanded before printing. E.g." },
                { First: "Single Page content popups: Names, photos & designations are available on the following page but bios are available only after click on image as popup in the same page. E.g." },
                { First: "Click Next scenarios: Content of the next person is visible only after click on next button( right arrow mark)" },
                { First: "Read more scenarios: Content is available on page load but read more/learn more needs to be clicked to view entire content." },
                { First: "On hover content: Some websites show content only on hover, but print should have content available in it" },
                { First: "Pagination:  In some website u need to click on pagination buttons to view all bios" },
                { First: "Input questionnaires before you could visit the website. Like select countries from drop down. Yes or no question" },
                { First: "Debug option" },
                { First: "Remove blank pages from PDFs" },
                { First: "Deployment on aws lambda" },
              ],
            },
            {
              heading: "Solutions:-",
              challengeli: [
                { First: `"Handled cookie popups by Installing a chrome extension in the browser through puppeteer.
                Extension link : https://chrome.google.com/webstore/detail/i-still-dont-care-about-c/edibdbjcniadpccecjdfdjjppcpchdlm"` },
                { First: "Resolved Image break problem  by resizing all the images by a fixed width" },
                { First: "Solved Single page toggle sections problem, On hover content, Input questionnaires  by stripping styles from the web page and keeping only that styles which is responsible for basic page structure. Eg: flex-box, margin, padding, height, width etc. Styling all hidden elements as visible so there should not be any hidden content." },
                { First: `"Handling read more, next page, button clicks, Pagination etc by below approach
                Firstly capture all the elements which are acting as a button and whose css property “cursor” is “pointer”."` },
                { First: "Click on each element which doesn't have any url/href one by one and observe changes in DOM if any changes are there then generate a pdf. Open a new tab with url/href which satisfies a specific condition and then generate pdf or scrape data." },
                { First: "Saving a log file in S3 bucket which contains all  the major logs like status of a particular task, errors etc. to provide debug options" },
                { First: "To remove blank pages the logic is getting content of pages one by one and checking if there is any text available in the content then keeping that page otherwise removing the page" },
                { First: "For deployment on aws-lambda, Use chrome-aws-lambda (npm package) to open a browser instance. https://www.npmjs.com/package/chrome-aws-lambda Always a headless browser will be opened and Unable to use chrome plugins." },
              ],
            },
          ],
        },

        companyinfo: {
          logourl:
            "yasmin",
          about: "About ",
          aboutpara:
            "Acuity Knowledge Partners (Acuity) is a leading provider of bespoke research, analytics and technology solutions to the financial services sector, including asset managers, corporate and investment banks, private equity and venture capital firms, hedge funds and consulting firms.",
          companysector: [
            {
              first: "Company Name:",
              secound: "Acuity Knowledge Partners",
            },
            {
              first: "INDUSTRY:",
              secound: "financial services",
            },
            {
              first: "LOCATION:",
              secound: " Bangalore, India",
            },
          ],
        },
        customer: [

        ],
      },
    ],
  },
];

export default data;
