{
  "_id": "6a1ee727b401979e73411699",
  "Package": "contentanalysis",
  "Title": "Scientific Content and Citation Analysis from PDF Documents",
  "Version": "1.1.0.9000",
  "Authors@R": "c(\nperson(given = \"Massimo\",\nfamily = \"Aria\",\nrole = c(\"cre\", \"aut\", \"cph\"),\nemail = \"aria@unina.it\",\ncomment = c(ORCID = \"0000-0002-8517-9411\")),\nperson(given = \"Corrado\",\nfamily = \"Cuccurullo\",\nrole = \"aut\",\nemail = \"cuccurullocorrado@gmail.com\",\ncomment = c(ORCID = \"0000-0002-7401-8575\")))",
  "Description": "Provides comprehensive tools for extracting and analyzing\nscientific content from PDF documents, including citation\nextraction, reference matching, text analysis, and bibliometric\nindicators. Supports multi-column PDF layouts, 'CrossRef' API\n<https://www.crossref.org/documentation/retrieve-metadata/rest-api/>\nintegration, and advanced citation parsing.",
  "License": "GPL (>= 3)",
  "Encoding": "UTF-8",
  "Roxygen": "list(markdown = TRUE)",
  "URL": "https://github.com/massimoaria/contentanalysis,",
  "BugReports": "https://github.com/massimoaria/contentanalysis/issues",
  "VignetteBuilder": "knitr",
  "Config/testthat/edition": "3",
  "Config/roxygen2/version": "8.0.0",
  "Config/pak/sysreqs": "cmake libglpk-dev make libicu-dev libjpeg-dev\nlibuv1-dev libxml2-dev libssl-dev libpoppler-cpp-dev\npoppler-data",
  "Repository": "https://massimoaria.r-universe.dev",
  "Date/Publication": "2026-05-19 15:55:51 UTC",
  "RemoteUrl": "https://github.com/massimoaria/contentanalysis",
  "RemoteRef": "HEAD",
  "RemoteSha": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-19 16:57:29 UTC",
    "User": "root"
  },
  "Author": "Massimo Aria [cre, aut, cph] (ORCID:\n<https://orcid.org/0000-0002-8517-9411>),\nCorrado Cuccurullo [aut] (ORCID:\n<https://orcid.org/0000-0002-7401-8575>)",
  "Maintainer": "Massimo Aria <aria@unina.it>",
  "MD5sum": "2d5d4e8759c0b5a16af38b6d1404b4ba",
  "_user": "massimoaria",
  "_type": "src",
  "_file": "contentanalysis_1.1.0.9000.tar.gz",
  "_fileid": "aa5de38739913bbcde647bdc457de796a783629ba1bec439bdc334fe455b4c58",
  "_filesize": 1102754,
  "_sha256": "aa5de38739913bbcde647bdc457de796a783629ba1bec439bdc334fe455b4c58",
  "_created": "2026-05-19T16:57:29.000Z",
  "_published": "2026-06-02T14:22:31.054Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79093264187,
      "time": 176,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7090099691"
    },
    {
      "job": 79093264230,
      "time": 174,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7090099009"
    },
    {
      "job": 79093264197,
      "time": 148,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7090095397"
    },
    {
      "job": 79093263627,
      "time": 137,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7090098792"
    },
    {
      "job": 79093263497,
      "time": 212,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7090031371"
    },
    {
      "job": 79093262877,
      "time": 130,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7360444680"
    },
    {
      "job": 79093263851,
      "time": 138,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7090085497"
    },
    {
      "job": 79093264527,
      "time": 121,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7090080173"
    },
    {
      "job": 79093264384,
      "time": 123,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7090080012"
    }
  ],
  "_buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/massimoaria/contentanalysis",
  "_commit": {
    "id": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
    "author": "Massimo Aria <16023543+massimoaria@users.noreply.github.com>",
    "committer": "Massimo Aria <16023543+massimoaria@users.noreply.github.com>",
    "message": "1.1.0.9000\n",
    "time": 1779206151
  },
  "_maintainer": {
    "name": "Massimo Aria",
    "email": "aria@unina.it",
    "login": "massimoaria",
    "description": "Massimo Aria is a full professor in Statistics for Social Sciences at the Department of Economics and Statistics of the University of Naples Federico II",
    "uuid": 16023543,
    "orcid": "0000-0002-8517-9411"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "base64enc",
      "version": ">= 0.1-3",
      "role": "Imports"
    },
    {
      "package": "dplyr",
      "version": ">= 1.1.0",
      "role": "Imports"
    },
    {
      "package": "httr2",
      "version": ">= 0.2.0",
      "role": "Imports"
    },
    {
      "package": "igraph",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "version": ">= 2.0.0",
      "role": "Imports"
    },
    {
      "package": "magrittr",
      "version": ">= 2.0.4",
      "role": "Imports"
    },
    {
      "package": "openalexR",
      "version": ">= 2.0.2",
      "role": "Imports"
    },
    {
      "package": "pdftools",
      "version": ">= 3.6.0",
      "role": "Imports"
    },
    {
      "package": "purrr",
      "version": ">= 1.1.0",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "version": ">= 1.5.2",
      "role": "Imports"
    },
    {
      "package": "tibble",
      "version": ">= 3.3.0",
      "role": "Imports"
    },
    {
      "package": "tidyr",
      "version": ">= 1.3.0",
      "role": "Imports"
    },
    {
      "package": "tidytext",
      "version": ">= 0.4.3",
      "role": "Imports"
    },
    {
      "package": "visNetwork",
      "version": ">= 2.1.4",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "plotly",
      "role": "Suggests"
    },
    {
      "package": "RColorBrewer",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "scales",
      "role": "Suggests"
    },
    {
      "package": "stringdist",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "mockery",
      "role": "Suggests"
    }
  ],
  "_owner": "massimoaria",
  "_selfowned": true,
  "_usedby": 2,
  "_updates": [
    {
      "week": "2025-40",
      "n": 2
    },
    {
      "week": "2025-41",
      "n": 25
    },
    {
      "week": "2025-43",
      "n": 7
    },
    {
      "week": "2025-44",
      "n": 5
    },
    {
      "week": "2025-46",
      "n": 4
    },
    {
      "week": "2025-49",
      "n": 2
    },
    {
      "week": "2025-50",
      "n": 6
    },
    {
      "week": "2025-51",
      "n": 1
    },
    {
      "week": "2026-09",
      "n": 8
    },
    {
      "week": "2026-10",
      "n": 11
    },
    {
      "week": "2026-11",
      "n": 2
    },
    {
      "week": "2026-13",
      "n": 8
    },
    {
      "week": "2026-14",
      "n": 1
    },
    {
      "week": "2026-21",
      "n": 3
    }
  ],
  "_tags": [
    {
      "name": "v0.2.1",
      "date": "2025-12-11"
    },
    {
      "name": "v1.0.0",
      "date": "2026-03-07"
    },
    {
      "name": "v1.1.0",
      "date": "2026-05-19"
    }
  ],
  "_stars": 2,
  "_contributors": [
    {
      "user": "massimoaria",
      "count": 89,
      "uuid": 16023543
    },
    {
      "user": "copilot",
      "count": 2,
      "uuid": 198982749
    }
  ],
  "_userbio": {
    "uuid": 16023543,
    "type": "user",
    "name": "Massimo Aria",
    "description": "Massimo Aria is a full professor in Statistics for Social Sciences at the Department of Economics and Statistics of the University of Naples Federico II"
  },
  "_downloads": {
    "count": 22209,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/contentanalysis"
  },
  "_devurl": "https://github.com/massimoaria/contentanalysis",
  "_searchresults": 21,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contentanalysis.html",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/massimoaria/contentanalysis",
  "_realowner": "massimoaria",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.2.0",
      "date": "2025-10-30"
    },
    {
      "version": "0.2.1",
      "date": "2025-12-12"
    },
    {
      "version": "1.0.0",
      "date": "2026-03-07"
    },
    {
      "version": "1.1.0",
      "date": "2026-05-19"
    }
  ],
  "_exports": [
    "%>%",
    "analyze_scientific_content",
    "calculate_readability_indices",
    "calculate_word_distribution",
    "classify_rhetorical_moves",
    "create_citation_network",
    "describe_citation_clusters",
    "extract_doi_from_pdf",
    "extract_pdf_metadata",
    "gemini_content_ai",
    "get_crossref_references",
    "get_example_paper",
    "map_citations_to_segments",
    "match_citations_to_references",
    "merge_text_chunks_named",
    "normalize_references_section",
    "parse_references_section",
    "pdf2txt_auto",
    "pdf2txt_multicolumn_safe",
    "plot_citation_clusters",
    "plot_word_distribution",
    "process_large_pdf",
    "readability_multiple",
    "split_into_sections"
  ],
  "_help": [
    {
      "page": "analyze_scientific_content",
      "title": "Enhanced scientific content analysis with citation extraction",
      "topics": [
        "analyze_scientific_content"
      ]
    },
    {
      "page": "calculate_readability_indices",
      "title": "Calculate readability indices for text",
      "topics": [
        "calculate_readability_indices"
      ]
    },
    {
      "page": "calculate_word_distribution",
      "title": "Calculate word distribution across text segments or sections",
      "topics": [
        "calculate_word_distribution"
      ]
    },
    {
      "page": "classify_rhetorical_moves",
      "title": "Classify Rhetorical Moves in Scientific Text",
      "topics": [
        "classify_rhetorical_moves"
      ]
    },
    {
      "page": "create_citation_network",
      "title": "Create Citation Co-occurrence Network",
      "topics": [
        "create_citation_network"
      ]
    },
    {
      "page": "describe_citation_clusters",
      "title": "Describe Citation Clusters by Section Using Reference Title N-grams",
      "topics": [
        "describe_citation_clusters"
      ]
    },
    {
      "page": "extract_doi_from_pdf",
      "title": "Extract DOI from PDF Metadata (Legacy Function)",
      "topics": [
        "extract_doi_from_pdf"
      ]
    },
    {
      "page": "extract_pdf_metadata",
      "title": "Extract DOI and Metadata from PDF",
      "topics": [
        "extract_pdf_metadata"
      ]
    },
    {
      "page": "gemini_content_ai",
      "title": "Process Content with Google Gemini AI",
      "topics": [
        "gemini_content_ai"
      ]
    },
    {
      "page": "get_crossref_references",
      "title": "Retrieve rich metadata from the CrossRef API for a given DOI",
      "topics": [
        "get_crossref_references"
      ]
    },
    {
      "page": "get_example_paper",
      "title": "Get path to example paper",
      "topics": [
        "get_example_paper"
      ]
    },
    {
      "page": "match_citations_to_references",
      "title": "Match citations to references",
      "topics": [
        "match_citations_to_references"
      ]
    },
    {
      "page": "merge_text_chunks_named",
      "title": "Merge Text Chunks into Named Sections",
      "topics": [
        "merge_text_chunks_named"
      ]
    },
    {
      "page": "normalize_references_section",
      "title": "Normalize references section formatting",
      "topics": [
        "normalize_references_section"
      ]
    },
    {
      "page": "parse_references_section",
      "title": "Parse references section from text",
      "topics": [
        "parse_references_section"
      ]
    },
    {
      "page": "pdf2txt_auto",
      "title": "Import PDF with Automatic Section Detection",
      "topics": [
        "pdf2txt_auto"
      ]
    },
    {
      "page": "pdf2txt_multicolumn_safe",
      "title": "Extract text from multi-column PDF with structure preservation",
      "topics": [
        "pdf2txt_multicolumn_safe"
      ]
    },
    {
      "page": "plot_citation_clusters",
      "title": "Plot Citation Cluster Descriptions",
      "topics": [
        "plot_citation_clusters"
      ]
    },
    {
      "page": "plot_word_distribution",
      "title": "Create interactive word distribution plot",
      "topics": [
        "plot_word_distribution"
      ]
    },
    {
      "page": "print.rhetorical_move_analysis",
      "title": "Print method for rhetorical move analysis",
      "topics": [
        "print.rhetorical_move_analysis"
      ]
    },
    {
      "page": "process_large_pdf",
      "title": "Process Large PDF Documents with Google Gemini AI",
      "topics": [
        "process_large_pdf"
      ]
    },
    {
      "page": "readability_multiple",
      "title": "Calculate readability indices for multiple texts",
      "topics": [
        "readability_multiple"
      ]
    },
    {
      "page": "remove_all_tables",
      "title": "Remove All Types of Tables (Markdown and Plain Text)",
      "topics": [
        "remove_all_tables"
      ]
    },
    {
      "page": "remove_code_blocks",
      "title": "Remove Markdown Code Block Markers",
      "topics": [
        "remove_code_blocks"
      ]
    },
    {
      "page": "remove_figure_caps",
      "title": "Remove Figure Captions",
      "topics": [
        "remove_figure_caps"
      ]
    },
    {
      "page": "split_into_sections",
      "title": "Split document text into sections",
      "topics": [
        "split_into_sections"
      ]
    }
  ],
  "_readme": "https://github.com/massimoaria/contentanalysis/raw/HEAD/README.md",
  "_rundeps": [
    "askpass",
    "base64enc",
    "bslib",
    "cachem",
    "cli",
    "cpp11",
    "curl",
    "digest",
    "dplyr",
    "evaluate",
    "fastmap",
    "fontawesome",
    "fs",
    "generics",
    "glue",
    "highr",
    "htmltools",
    "htmlwidgets",
    "httr",
    "httr2",
    "igraph",
    "janeaustenr",
    "jquerylib",
    "jsonlite",
    "knitr",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "memoise",
    "mime",
    "openalexR",
    "openssl",
    "pdftools",
    "pillar",
    "pkgconfig",
    "purrr",
    "qpdf",
    "R6",
    "rappdirs",
    "Rcpp",
    "rlang",
    "rmarkdown",
    "sass",
    "SnowballC",
    "stringi",
    "stringr",
    "sys",
    "tibble",
    "tidyr",
    "tidyselect",
    "tidytext",
    "tinytex",
    "tokenizers",
    "utf8",
    "vctrs",
    "visNetwork",
    "withr",
    "xfun",
    "yaml"
  ],
  "_vignettes": [
    {
      "source": "introduction.Rmd",
      "filename": "introduction.html",
      "title": "contentanalysis",
      "author": "By Massimo Aria",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "What Makes It Unique?",
        "The Complete Workflow",
        "Getting Started",
        "Download Example Paper",
        "PDF Import and Section Detection",
        "Basic Import",
        "Manual Column Specification",
        "Comprehensive Content Analysis with API Enrichment",
        "Full Analysis with CrossRef and OpenAlex Integration",
        "Understanding the Results",
        "Summary Statistics",
        "Working with Enriched Reference Data",
        "Exploring Reference Sources",
        "Accessing OpenAlex Metadata",
        "Citation-Reference Matching Quality",
        "Citation Analysis",
        "Citation Extraction",
        "Citation Type Analysis",
        "Citation Contexts",
        "Citation Network Visualization",
        "Creating the Network",
        "Understanding Network Features",
        "Network Statistics",
        "Customizing the Network",
        "Interpreting the Network",
        "Citation Co-occurrence Data",
        "Text Analysis",
        "Word Frequencies",
        "N-gram Analysis",
        "Readability Metrics",
        "Word Distribution Analysis",
        "Visualization",
        "Advanced Examples",
        "Finding Specific Citations",
        "Analyzing Highly Cited References",
        "Custom Stopwords",
        "Segment-based Analysis",
        "Setting Up External API Access",
        "CrossRef API",
        "OpenAlex API",
        "Export Results",
        "Save to CSV",
        "Workflow for Multiple Papers",
        "Conclusion",
        "Key Advantages",
        "Additional Resources"
      ],
      "created": "2025-10-06 06:06:00",
      "modified": "2025-10-23 10:53:28",
      "commits": 9
    }
  ],
  "_score": 7.593027584897666,
  "_indexed": true,
  "_nocasepkg": "contentanalysis",
  "_universes": [
    "massimoaria"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.1.0.9000",
      "date": "2026-05-19T17:00:00.000Z",
      "distro": "noble",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "bbbacd018c63d2f2b364855e33687cd38a5cee14f9510e8d0ce82b0e16513815",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.1.0.9000",
      "date": "2026-05-19T16:59:59.000Z",
      "distro": "noble",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "80963348483ce106c12eabd956dc9fe249e44e15dd7e6558e62272d8e07f9849",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.1.0.9000",
      "date": "2026-05-19T16:59:54.000Z",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "be8b3528249feafbdcaed55c2228e717222f5728d214a47bcdfc678fe1df37c4",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.1.0.9000",
      "date": "2026-05-19T17:00:07.000Z",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "93f8868f5d082e6f8eba9739f0ca63dca89a772ce576c2869eaf5d91716eb0ed",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.1.0.9000",
      "date": "2026-05-19T16:59:07.000Z",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "c0cac015fb355cf22dbbf9608b14799925a1e09217ca3c68dfa3c0d202f29aa9",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.1.0.9000",
      "date": "2026-05-19T16:59:01.000Z",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "29f514f8bdc70085dfef67d69debaffa5c89e0818be9cf88517537d6cb3d27a7",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.1.0.9000",
      "date": "2026-05-19T16:58:56.000Z",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "7f3729f2f64dbbeb17f71e7d8d5860acfffbfbf1e3568cef7e0470aae5b9fea4",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.1.0.9000",
      "date": "2026-06-02T14:22:02.000Z",
      "commit": "ce6d66cdeefae5658e21e25ae897d7452a96e691",
      "fileid": "aadce1565f1344ddf38b51a5af5b447ef5abcbba734d666c2e1b402c6d893c95",
      "status": "success",
      "buildurl": "https://github.com/r-universe/massimoaria/actions/runs/26112091050"
    }
  ]
}