summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/readabilipy/default.nix
blob: e220f5caff29eb49b513810a40505ae4af83ef17 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
{
  lib,
  beautifulsoup4,
  buildPythonPackage,
  buildNpmPackage,
  fetchFromGitHub,
  html5lib,
  lxml,
  nodejs,
  pytestCheckHook,
  pythonOlder,
  regex,
  setuptools,
  testers,
  readabilipy,
}:

buildPythonPackage rec {
  pname = "readabilipy";
  version = "0.3.0";
  pyproject = true;

  disabled = pythonOlder "3.7";

  src = fetchFromGitHub {
    owner = "alan-turing-institute";
    repo = "ReadabiliPy";
    tag = "v${version}";
    hash = "sha256-FYdSbq3rm6fBHm5fDRAB0airX9fNcUGs1wHN4i6mnG0=";
  };

  javascript = buildNpmPackage {
    pname = "readabilipy-javascript";
    inherit version;

    src = src;
    sourceRoot = "${src.name}/readabilipy/javascript";
    npmDepsHash = "sha256-LiPSCZamkJjivzpawG7H9IEXYjn3uzFeY2vfucyHfUo=";

    postPatch = ''
      cp ${./package-lock.json} package-lock.json
    '';

    dontNpmBuild = true;
  };

  nativeBuildInputs = [ setuptools ];

  propagatedBuildInputs = [
    beautifulsoup4
    html5lib
    lxml
    regex
  ];

  postPatch = ''
    ln -s $javascript/lib/node_modules/ReadabiliPy/node_modules readabilipy/javascript/node_modules
    echo "recursive-include readabilipy/javascript *" >MANIFEST.in
  '';

  postInstall = ''
    wrapProgram $out/bin/readabilipy \
      --prefix PATH : ${nodejs}/bin
  '';

  nativeCheckInputs = [
    pytestCheckHook
    nodejs
  ];

  pythonImportsCheck = [ "readabilipy" ];

  disabledTestPaths = [
    # Exclude benchmarks
    "tests/test_benchmarking.py"
  ];

  passthru = {
    tests.version = testers.testVersion {
      package = readabilipy;
      command = "readabilipy --version";
      version = "${version} (Readability.js supported: yes)";
    };
  };

  meta = with lib; {
    description = "HTML content extractor";
    mainProgram = "readabilipy";
    homepage = "https://github.com/alan-turing-institute/ReadabiliPy";
    changelog = "https://github.com/alan-turing-institute/ReadabiliPy/blob/${src.tag}/CHANGELOG.md";
    license = licenses.mit;
    maintainers = with maintainers; [ fab ];
  };
}