Apply ruff flake8-comprehensions (#21694)

This commit is contained in:
Aaron Gokaslan
2023-02-22 03:14:54 -05:00
committed by GitHub
parent df06fb1f0b
commit 5e8c8eb5ba
230 changed files with 971 additions and 955 deletions

View File

@@ -29,7 +29,7 @@ def get_min_hash(tokens: List[str]) -> Optional[MinHash]:
def get_tokens(code: str) -> Set[str]:
"""Tokenize a code snippet."""
return set([t for t in NON_ALPHA.split(code) if len(t.strip()) > 0])
return {t for t in NON_ALPHA.split(code) if len(t.strip()) > 0}
class DuplicationIndex:
@@ -243,7 +243,7 @@ def deduplicate_dataset(
>>> ds_dedup, duplicate_clusters = deduplicate_dataset(ds, jaccard_threshold=0.85)
"""
duplicate_clusters = make_duplicate_clusters(dataset, jaccard_threshold)
duplicate_indices = set(x["base_index"] for cluster in duplicate_clusters for x in cluster)
duplicate_indices = {x["base_index"] for cluster in duplicate_clusters for x in cluster}
extreme_dict = {}
extremes_clusters = find_extremes(duplicate_clusters, dataset, jaccard_threshold)
for extremes in extremes_clusters: