DurationSampler

Bases: Sampler

A class to sample durations based on a specified purpose, extending the Sampler class.

Methods:

Name Description
sample_duration

Sample a duration for a given purpose, optionally constrained by minimum and maximum duration.

Source code in tripsender\sampler.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class DurationSampler(Sampler):
    """
    A class to sample durations based on a specified purpose, extending the Sampler class.

    Methods:
        sample_duration(purpose, min_duration=None, max_duration=None):
            Sample a duration for a given purpose, optionally constrained by minimum and maximum duration.
    """
    def sample_duration(self, purpose, min_duration=None, max_duration=None):
        # Make purpose case insensitive
        purpose = purpose.lower()
        # Make labels in json_data case insensitive
        self.json_data = {k.lower(): v for k, v in self.json_data.items()}

        if purpose not in self.json_data:
            print(f"No data found for purpose: {purpose}")
            return None
        data = self.json_data[purpose]
        sample = self.sample_from_distribution(data['distribution'], data['parameters'])
        sample_minutes = sample * 60

        # Ensure the sample is within the desired range
        while (min_duration is not None and sample_minutes < min_duration) or (max_duration is not None and sample_minutes > max_duration):
            sample = self.sample_from_distribution(data['distribution'], data['parameters'])
            sample_minutes = sample * 60
            # Round to nearest minute
            sample_minutes = round(sample_minutes)

        return sample_minutes

Sampler

A class to sample from various statistical distributions.

Attributes:

Name Type Description
json_data dict

The input JSON data containing distribution parameters.

gmm_cache dict

Cache for Gaussian Mixture Model (GMM) objects.

Methods:

Name Description
_get_gmm

Retrieve or create a Gaussian Mixture Model (GMM) with the specified number of components and parameters.

sample_from_distribution

Sample from the specified statistical distribution using the provided parameters.

Source code in tripsender\sampler.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class Sampler:
    """
    A class to sample from various statistical distributions.

    Attributes:
        json_data (dict): The input JSON data containing distribution parameters.
        gmm_cache (dict): Cache for Gaussian Mixture Model (GMM) objects.

    Methods:
        _get_gmm(num_components, parameters):
            Retrieve or create a Gaussian Mixture Model (GMM) with the specified number of components and parameters.
        sample_from_distribution(distribution_type, parameters):
            Sample from the specified statistical distribution using the provided parameters.
    """
    def __init__(self, json_data):
        self.json_data = json_data
        self.gmm_cache = {}  # Cache for GMM objects

    def _get_gmm(self, num_components, parameters):
        # Check cache for an existing GMM with the desired number of components
        cache_key = f"gmm_{num_components}"
        if cache_key not in self.gmm_cache:
            # If not in cache, create a new GMM
            gmm = GaussianMixture(n_components=num_components, covariance_type='full')

            # Set means
            gmm.means_ = np.array(parameters[1]).reshape(num_components, -1)

            # Set covariances and ensure they are correctly shaped for 'full' covariance type
            covariances = np.array(parameters[2])
            if len(covariances.shape) == 2:
                covariances = covariances.reshape(num_components, covariances.shape[2], covariances.shape[2])
            gmm.covariances_ = covariances

            # Set weights and normalize them
            weights = np.array(parameters[0]).flatten()
            normalized_weights = weights / weights.sum()
            gmm.weights_ = normalized_weights

            # Store the newly created GMM in the cache
            self.gmm_cache[cache_key] = gmm

        # Return the GMM (either from cache or the newly created one)
        return self.gmm_cache[cache_key]

    def sample_from_distribution(self, distribution_type, parameters):
        if distribution_type == 'gamma':
            return gamma.rvs(*parameters)
        elif distribution_type == 'invgauss':
            return invgauss.rvs(*parameters)
        elif distribution_type == 'lognorm':
            return lognorm.rvs(*parameters)
        elif distribution_type == 'genextreme':
            return genextreme.rvs(*parameters)
        elif distribution_type == 'weibull_max':
            return weibull_max.rvs(*parameters)
        elif distribution_type in ['bimodal', 'trimodal']:
            num_components = 2 if distribution_type == 'bimodal' else 3
            gmm = self._get_gmm(num_components, parameters)
            return float(gmm.sample()[0][0][0])
        else:
            raise ValueError(f"Unsupported distribution type: {distribution_type}")

StartTimeSampler

Bases: Sampler

A class to sample start times based on a specified purpose, extending the Sampler class.

Methods:

Name Description
sample_start_time

Sample a start time for a given purpose, optionally constrained by minimum and maximum time.

Source code in tripsender\sampler.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
class StartTimeSampler(Sampler):
    """
    A class to sample start times based on a specified purpose, extending the Sampler class.

    Methods:
        sample_start_time(purpose, min_time=None, max_time=None):
            Sample a start time for a given purpose, optionally constrained by minimum and maximum time.
    """
    def sample_start_time(self, purpose, min_time=None, max_time=None):
        if purpose not in self.json_data:
            print(f"No data found for purpose: {purpose}")
            return None
        data = self.json_data[purpose]
        sample = self.sample_from_distribution(data['distribution'], data['parameters'])
        sample_time = abs(sample) % 24  # Wrap around to fit within 24 hours and ensure non-negative

        # Ensure the sample is within the desired range
        while (min_time is not None and sample_time < min_time) or (max_time is not None and sample_time > max_time):
            sample = self.sample_from_distribution(data['distribution'], data['parameters'])
            sample_time = abs(sample) % 24

        # Return in 'HHMM' format
        return f"{int(sample_time):02d}{int((sample_time * 60) % 60):02d}"