[AUR-401] Disable Haystack telemetry with monkey patching (#1)

Sample Haystack log when running a pipeline. Note: the `pipeline.classname` can leak company information.

```json
{
  "hardware.cpus": 16,
  "hardware.gpus": 0,
  "libraries.colab": false,
  "libraries.cuda": false,
  "libraries.haystack": "1.20.0rc0",
  "libraries.ipython": false,
  "libraries.pytest": false,
  "libraries.ray": false,
  "libraries.torch": false,
  "libraries.transformers": "4.31.0",
  "os.containerized": false,
  "os.family": "Linux",
  "os.machine": "x86_64",
  "os.version": "6.2.0-26-generic",
  "pipeline.classname": "TempPipeline",
  "pipeline.config_hash": "07a8eddd5a6e512c0d898c6d9f445ed9",
  "pipeline.nodes.PromptNode": 1,
  "pipeline.nodes.Shaper": 1,
  "pipeline.nodes.WebRetriever": 1,
  "pipeline.run_parameters.debug": false,
  "pipeline.run_parameters.documents": [
    0
  ],
  "pipeline.run_parameters.file_paths": 0,
  "pipeline.run_parameters.labels": 0,
  "pipeline.run_parameters.meta": 1,
  "pipeline.run_parameters.params": false,
  "pipeline.run_parameters.queries": true,
  "pipeline.runs": 1,
  "pipeline.type": "Query",
  "python.version": "3.10.12"
}
```

Solution: Haystack telemetry uses the `telemetry` variable, `posthog` library and `HAYSTACK_TELEMETRY_ENABLED` envar. We set the envar to False and make sure the relevant objects are disabled.
This commit is contained in:
Nguyen Trung Duc (john)
2023-08-22 10:02:46 +07:00
committed by GitHub
parent 043209fda7
commit e9d1d5c118
4 changed files with 548 additions and 0 deletions

21
tests/test_telemetry.py Normal file
View File

@@ -0,0 +1,21 @@
def test_disable_telemetry_import_haystack_first():
"""Test that telemetry is disabled when kotaemon lib is initiated after"""
import os
import haystack.telemetry
assert haystack.telemetry.telemetry is not None
assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") != "False"
import kotaemon # noqa: F401
assert haystack.telemetry.telemetry is None
assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") == "False"
def test_disable_telemetry_import_haystack_after_kotaemon():
"""Test that telemetry is disabled when kotaemon lib is initiated before"""
import os
import kotaemon # noqa: F401
import haystack.telemetry
assert haystack.telemetry.telemetry is None
assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") == "False"